[gcc r15-9238] simplify-rtx: Fix shortcut for vector eq/ne

2025-04-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:546f28f83ceba74dc8bf84b0435c0159ffca971a

commit r15-9238-g546f28f83ceba74dc8bf84b0435c0159ffca971a
Author: Richard Sandiford 
Date:   Mon Apr 7 08:03:46 2025 +0100

simplify-rtx: Fix shortcut for vector eq/ne

This patch forestalls a regression in gcc.dg/rtl/x86_64/vector_eq.c
with the patch for PR116398.  The test wants:

  (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int 0) 
(const_int 0) (const_int 0) (const_int 0)])))
  (cinsn 5 (set (reg:V4SI <2>)
(eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>

to be folded to a vector of -1s.  One unusual thing about the fold
is that the <1> in the second insn is uninitialised; it looks like
it should be replaced by <0>, or that there should be an insn 4 that
copies <0> to <1>.

As it stands, the test relies on init-regs to insert a zero
initialisation of <1>.  This happens after all the cse/pre/fwprop
stuff, with only dce passes between init-regs and combine.
Combine therefore sees:

(insn 3 2 8 2 (set (reg:V4SI 98)
(const_vector:V4SI [
(const_int 0 [0]) repeated x4
])) 2403 {movv4si_internal}
 (nil))
(insn 8 3 9 2 (clobber (reg:V4SI 99)) -1
 (nil))
(insn 9 8 5 2 (set (reg:V4SI 99)
(const_vector:V4SI [
(const_int 0 [0]) repeated x4
])) -1
 (nil))
(insn 5 9 7 2 (set (reg:V4SI 100)
(eq:V4SI (reg:V4SI 98)
(reg:V4SI 99))) 7874 {*sse2_eqv4si3}
 (expr_list:REG_DEAD (reg:V4SI 99)
(expr_list:REG_DEAD (reg:V4SI 98)
(expr_list:REG_EQUAL (eq:V4SI (const_vector:V4SI [
(const_int 0 [0]) repeated x4
])
(reg:V4SI 99))
(nil)

It looks like the test should then pass through a 3, 9 -> 5 combination,
so that we get an (eq ...) between two zeros and fold it to a vector
of -1s.  But although the combination is attempted, the fold doesn't
happen.  Instead, combine is left to match the unsimplified (eq ...)
between two zeros, which rightly fails.  The test only passes because
late_combine2 happens to try simplifying an (eq ...) between reg X and
reg X, which does fold to a vector of -1s.

The different handling of registers and constants is due to this
code in simplify_const_relational_operation:

  if (INTEGRAL_MODE_P (mode) && trueop1 != const0_rtx
  && (code == EQ || code == NE)
  && ! ((REG_P (op0) || CONST_INT_P (trueop0))
&& (REG_P (op1) || CONST_INT_P (trueop1)))
  && (tem = simplify_binary_operation (MINUS, mode, op0, op1)) != 0
  /* We cannot do this if tem is a nonzero address.  */
  && ! nonzero_address_p (tem))
return simplify_const_relational_operation (signed_condition (code),
mode, tem, const0_rtx);

INTEGRAL_MODE_P matches vector integer modes, but everything else
about the condition is written for scalar integers only.  Thus if
trueop0 and trueop1 are equal vector constants, we'll bypass all
the exclusions and try simplifying a subtraction.  This will succeed,
giving a vector of zeros.  The recursive call will then try to simplify
a comparison between the vector of zeros and const0_rtx, which isn't
well-formed.  Luckily or unluckily, the ill-formedness doesn't trigger
an ICE, but it does prevent any simplification from happening.

The least-effort fix would be to replace INTEGRAL_MODE_P with
SCALAR_INT_MODE_P.  But the fold does make conceptual sense for
vectors too, so it seemed better to keep the INTEGRAL_MODE_P and
generalise the rest of the condition to match.

gcc/
* simplify-rtx.cc (simplify_const_relational_operation): Generalize
the constant checks in the fold-via-minus path to match the
INTEGRAL_MODE_P condition.

Diff:
---
 gcc/simplify-rtx.cc | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index fe007bc7d96a..6f969effdf99 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -6657,15 +6657,20 @@ simplify_const_relational_operation (enum rtx_code code,
  we do not know the signedness of the operation on either the left or
  the right hand side of the comparison.  */
 
-  if (INTEGRAL_MODE_P (mode) && trueop1 != const0_rtx
+  if (INTEGRAL_MODE_P (mode)
+  && trueop1 != CONST0_RTX (mode)
   && (code == EQ || code == NE)
-  && ! ((REG_P (op0) || CONST_INT_P (trueop0))
-   && (REG_P (op1) || CONST_INT_P (trueop1)))
+  && ! ((REG_P (op0)
+|| CONST_SCALAR_INT_P (trueop0)
+|| CONST_VECTOR_P (trueop0))
+   && (REG_P (op1)
+  

[gcc r15-9241] combine: Optimise distribute_links search [PR116398]

2025-04-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:107a1b2126ceb42a79edbc388863c868bd4fbc2e

commit r15-9241-g107a1b2126ceb42a79edbc388863c868bd4fbc2e
Author: Richard Sandiford 
Date:   Mon Apr 7 08:03:48 2025 +0100

combine: Optimise distribute_links search [PR116398]

Another problem in PR101523 was that, after each successful 2->2
combination attempt, distribute_links would search further and further
for the next combinable use of the i2 destination.  Each search would
start at i2 itself, making the search quadratic in the worst case.

In a 2->2 combination, if i2 is unchanged, the search can start at i3
instead of i2.  The same thing applies to i2 when distributing i2's
links, since the only changes to earlier instructions are the deletion
of i0 and i1.

This change, combined with the previous split_i2i3 patch, gives a
34.6% speedup in combine for the testcase in PR101523.  Combine
goes from being 41% to 34% of compile time.

gcc/
PR testsuite/116398
* combine.cc (distribute_links): Take an optional start point.
(try_combine): If only i3 has changed, only distribute i3's links,
not i2's.  Start the search for the new use from i3 rather than
from the definition instruction.  Likewise start the search for
the new use from i2 when distributing i2's links.

Diff:
---
 gcc/combine.cc | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index e29cff7147d9..e99b064c98d4 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -472,7 +472,7 @@ static void move_deaths (rtx, rtx, int, rtx_insn *, rtx *);
 static bool reg_bitfield_target_p (rtx, rtx);
 static void distribute_notes (rtx, rtx_insn *, rtx_insn *, rtx_insn *,
  rtx, rtx, rtx);
-static void distribute_links (struct insn_link *);
+static void distribute_links (struct insn_link *, rtx_insn * = nullptr);
 static void mark_used_regs_combine (rtx);
 static void record_promoted_value (rtx_insn *, rtx);
 static bool unmentioned_reg_p (rtx, rtx);
@@ -4592,10 +4592,15 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
NULL_RTX, NULL_RTX, NULL_RTX);
   }
 
-distribute_links (i3links);
-distribute_links (i2links);
-distribute_links (i1links);
-distribute_links (i0links);
+if (only_i3_changed)
+  distribute_links (i3links, i3);
+else
+  {
+   distribute_links (i3links);
+   distribute_links (i2links, i2);
+   distribute_links (i1links);
+   distribute_links (i0links);
+  }
 
 if (REG_P (i2dest))
   {
@@ -14986,10 +14991,13 @@ distribute_notes (rtx notes, rtx_insn *from_insn, 
rtx_insn *i3, rtx_insn *i2,
 
 /* Similarly to above, distribute the LOG_LINKS that used to be present on
I3, I2, and I1 to new locations.  This is also called to add a link
-   pointing at I3 when I3's destination is changed.  */
+   pointing at I3 when I3's destination is changed.
+
+   If START is nonnull and an insn, we know that the next location for each
+   link is no earlier than START.  */
 
 static void
-distribute_links (struct insn_link *links)
+distribute_links (struct insn_link *links, rtx_insn *start)
 {
   struct insn_link *link, *next_link;
 
@@ -15055,7 +15063,10 @@ distribute_links (struct insn_link *links)
 I3 to I2.  Also note that not much searching is typically done here
 since most links don't point very far away.  */
 
-  for (insn = NEXT_INSN (link->insn);
+  insn = start;
+  if (!insn || NOTE_P (insn))
+   insn = NEXT_INSN (link->insn);
+  for (;
   (insn && (this_basic_block->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
 || BB_HEAD (this_basic_block->next_bb) != insn));
   insn = NEXT_INSN (insn))


[gcc r15-9240] combine: Avoid split_i2i3 search if i2 is unchanged [PR116398]

2025-04-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:30a4eedfdbfeb1caaadd43738cbb0b49d7fa120b

commit r15-9240-g30a4eedfdbfeb1caaadd43738cbb0b49d7fa120b
Author: Richard Sandiford 
Date:   Mon Apr 7 08:03:47 2025 +0100

combine: Avoid split_i2i3 search if i2 is unchanged [PR116398]

When combining a single-set i2 into a multi-set i3, combine
first tries to match the new multi-set in-place.  If that fails,
combine considers splitting the multi-set so that one set goes in
i2 and the other set stays in i3.  That moves a destination from i3
to i2 and so combine needs to update any associated log link for that
destination to point to i2 rather than i3.

However, that kind of split can also occur for 2->2 combinations.
For a 2-instruction combination in which i2 doesn't die in i3, combine
tries a 2->1 combination by turning i3 into a parallel of the original
i2 and the combined i3.  If that fails, combine will split the parallel
as above, so that the first set goes in i2 and the second set goes in i3.
But that can often leave i2 unchanged, meaning that no destinations have
moved and so no search is necessary.

gcc/
PR testsuite/116398
* combine.cc (try_combine): Shortcut the split_i2i3 handling if
i2 is unchanged.

Diff:
---
 gcc/combine.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 65a87a45b3be..e29cff7147d9 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -4212,6 +4212,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
 
   bool only_i3_changed = !i0 && !i1 && rtx_equal_p (newi2pat, PATTERN (i2));
 
+  /* If only i3 has changed, any split of the combined instruction just
+ restored i2 to its original state.  No destinations moved from i3
+ to i2.  */
+  if (only_i3_changed)
+split_i2i3 = false;
+
   /* We now know that we can do this combination.  Merge the insns and
  update the status of registers and LOG_LINKS.  */


[gcc r15-9242] combine: Limit insn searchs for 2->2 combinations [PR116398]

2025-04-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:a1a0026c659196928113bad1c7889f5ca0999d06

commit r15-9242-ga1a0026c659196928113bad1c7889f5ca0999d06
Author: Richard Sandiford 
Date:   Mon Apr 7 08:03:49 2025 +0100

combine: Limit insn searchs for 2->2 combinations [PR116398]

As noted in the previous patch, combine still takes >30% of
compile time in the original testcase for PR101523.  The problem
is that try_combine uses linear insn searches for some dataflow
queries, so in the worst case, an unlimited number of 2->2
combinations for the same i2 can lead to quadratic behaviour.

This patch limits distribute_links to a certain number
of instructions when i2 is unchanged.  As Segher said in the PR trail,
it would make more conceptual sense to apply the limit unconditionally,
but I thought it would be better to change as little as possible at
this development stage.  Logically, in stage 1, the --param should
be applied directly by distribute_links with no input from callers.

As I mentioned in:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398#c28

I think it's safe to drop log links even if a use exists.  All
processing of log links seems to handle the absence of a link
for a particular register in a conservative way.

The initial set-up errs on the side of dropping links, since for example
create_log_links has:

 /* flow.c claimed:

 We don't build a LOG_LINK for hard registers contained
 in ASM_OPERANDs.  If these registers get replaced,
 we might wind up changing the semantics of the insn,
 even if reload can make what appear to be valid
 assignments later.  */
  if (regno < FIRST_PSEUDO_REGISTER
  && asm_noperands (PATTERN (use_insn)) >= 0)
continue;

which excludes combinations by dropping log links, rather than during
try_combine.  And:

  /* If this register is being initialized using itself, and the
 register is uninitialized in this basic block, and there are
 no LOG_LINKS which set the register, then part of the
 register is uninitialized.  In that case we can't assume
 anything about the number of nonzero bits.

 ??? We could do better if we checked this in
 reg_{nonzero_bits,num_sign_bit_copies}_for_combine.  Then we
 could avoid making assumptions about the insn which initially
 sets the register, while still using the information in other
 insns.  We would have to be careful to check every insn
 involved in the combination.  */

  if (insn
  && reg_referenced_p (x, PATTERN (insn))
  && !REGNO_REG_SET_P (DF_LR_IN (BLOCK_FOR_INSN (insn)),
   REGNO (x)))
{
  struct insn_link *link;

  FOR_EACH_LOG_LINK (link, insn)
if (dead_or_set_p (link->insn, x))
  break;
  if (!link)
{
  rsp->nonzero_bits = GET_MODE_MASK (mode);
  rsp->sign_bit_copies = 1;
  return;
}
}

treats the lack of a log link as a possible sign of uninitialised data,
but that would be a missed optimisation rather than a correctness issue.

One question is what the default --param value should be.  I went with
Jakub's suggestion of 3000 from:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398#c25

Also, to answer Jakub's question in that comment, I tried bisecting:

  int limit = atoi (getenv ("BISECT"));

(so applying the limit for all calls from try_combine) with an
abort in distribute_links if the limit caused a link to be skipped.
The minimum BISECT value that allowed an aarch64-linux-gnu bootstrap
to succeed with --enable-languages=all --enable-checking=yes,rtl,extra
was 142, so much lower than the parameter value.  I realised too late
that --enable-checking=release would probably have been a more
interesting test.

The previous patch meant that distribute_links itself is now linear
for a given i2 definition, since each search starts at the previous
last use, rather than at i2 itself.  This means that the limit has
to be applied cumulatively across all searches for the same link.

The patch does that by storing a counter in the insn_link structure.
There was a 32-bit hole there on LP64 hosts.

gcc/
PR testsuite/116398
* params.opt (-param=max-combine-search-insns=): New param.
* doc/invoke.texi: Document it.
* combine.cc (insn_link::insn_count): New field.
(alloc_insn_link): Initialize it.
(distribut

[gcc r15-9239] combine: Allow 2->2 combinations, but with a tweak [PR116398]

2025-04-07 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:4d7a634f6d41029811cdcbd5f7282b5b07890094

commit r15-9239-g4d7a634f6d41029811cdcbd5f7282b5b07890094
Author: Richard Sandiford 
Date:   Mon Apr 7 08:03:47 2025 +0100

combine: Allow 2->2 combinations, but with a tweak [PR116398]

One of the problems in PR101523 was that, after each successful
2->2 combination attempt, try_combine would restart combination
attempts at i2 even if i2 hadn't changed.  This led to quadratic
behaviour as the same failed combinations between i2 and i3 were
tried repeatedly.

The original patch for the PR dealt with that by disallowing 2->2
combinations.  However, that led to various optimisation regressions,
so there was interest in allowing the combinations again, at least
until an alternative way of getting the same results is in place.

This patch is a variant of Richi's in:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101523#c53

but limited to when we're combining 2 instructions.

This speeds up combine by 10x on the original PR101523 testcase
and reduces combine's memory footprint by 100x.

gcc/
PR testsuite/116398
* combine.cc (try_combine): Reallow 2->2 combinations.  Detect when
only i3 has changed and restart from i3 in that case.

gcc/testsuite/
* gcc.target/aarch64/popcnt-le-1.c: Account for commutativity of 
TST.
* gcc.target/aarch64/popcnt-le-3.c: Likewise AND.
* gcc.target/aarch64/pr100056.c: Revert previous patch.
* gcc.target/aarch64/sve/pred-not-gen-1.c: Likewise.
* gcc.target/aarch64/sve/pred-not-gen-4.c: Likewise.
* gcc.target/aarch64/sve/var_stride_2.c: Likewise.
* gcc.target/aarch64/sve/var_stride_4.c: Likewise.

Co-authored-by: Richard Biener 

Diff:
---
 gcc/combine.cc| 14 --
 gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c|  4 ++--
 gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c|  4 ++--
 gcc/testsuite/gcc.target/aarch64/pr100056.c   |  4 +++-
 gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c |  4 ++--
 gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c |  4 ++--
 gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c   |  3 ++-
 gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c   |  3 ++-
 8 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 1b6c4e314cc9..65a87a45b3be 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -4210,16 +4210,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
   adjust_for_new_dest (i3);
 }
 
-  /* If I2 didn't change, this is not a combination (but a simplification or
- canonicalisation with context), which should not be done here.  Doing
- it here explodes the algorithm.  Don't.  */
-  if (rtx_equal_p (newi2pat, PATTERN (i2)))
-{
-  if (dump_file)
-   fprintf (dump_file, "i2 didn't change, not doing this\n");
-  undo_all ();
-  return 0;
-}
+  bool only_i3_changed = !i0 && !i1 && rtx_equal_p (newi2pat, PATTERN (i2));
 
   /* We now know that we can do this combination.  Merge the insns and
  update the status of registers and LOG_LINKS.  */
@@ -4787,6 +4778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
   combine_successes++;
   undo_commit ();
 
+  if (only_i3_changed)
+return i3;
+
   rtx_insn *ret = newi2pat ? i2 : i3;
   if (added_links_insn && DF_INSN_LUID (added_links_insn) < DF_INSN_LUID (ret))
 ret = added_links_insn;
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
index b4141da982c9..843fdac9fd8e 100644
--- a/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
@@ -8,7 +8,7 @@
 /*
 ** le32:
 ** sub w([0-9]+), w0, #1
-** tst w0, w\1
+** tst (?:w0, w\1|w\1, w0)
 ** csetw0, eq
 ** ret
 */
@@ -20,7 +20,7 @@ unsigned le32 (const unsigned int a) {
 /*
 ** gt32:
 ** sub w([0-9]+), w0, #1
-** tst w0, w\1
+** tst (?:w0, w\1|w\1, w0)
 ** csetw0, ne
 ** ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
index b811e6f6e8fe..3b558e95d819 100644
--- a/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
@@ -8,7 +8,7 @@
 /*
 ** le16:
 ** sub w([0-9]+), w0, #1
-** and w([0-9]+), w0, w\1
+** and w([0-9]+), (?:w0, w\1|w\1, w0)
 ** tst w\2, 65535
 ** csetw0, eq
 ** ret
@@ -21,7 +21,7 @@ unsigned le16 (const unsigned short a) {
 /*
 ** gt16:
 ** sub w([0-9]+), w0, #1
-** and w([0-9]+), w0, w\1
+** and w([0-9]+), (?:w0, w\1|w\1, w0)
 ** tst w\2, 65535
 ** csetw0, ne
 ** ret
diff --git

[gcc r15-9243] libgomp.texi: Add GCN doc for omp_target_memcpy_rect

2025-04-07 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:0c63c7524bd523ea82933e90689b63d80e16d67e

commit r15-9243-g0c63c7524bd523ea82933e90689b63d80e16d67e
Author: Tobias Burnus 
Date:   Mon Apr 7 09:04:53 2025 +0200

libgomp.texi: Add GCN doc for omp_target_memcpy_rect

libgomp/ChangeLog:

* libgomp.texi (omp_target_memcpy_rect_async,
omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'.
(AMD Radeon (GCN)): Document how memcpy_rect is implemented.
(nvptx): Move item about memcpy_rect item down; use present tense.

Diff:
---
 libgomp/libgomp.texi | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 4217c29dd377..fed9d5efb6af 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -2316,7 +2316,7 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}
+@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, 
@ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6
@@ -2391,7 +2391,7 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}
+@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, 
@ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8
@@ -6911,6 +6911,11 @@ The implementation remark:
   @code{omp_thread_mem_alloc}, all use low-latency memory as first
   preference, and fall back to main graphics memory when the low-latency
   pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+  @code{omp_target_memcpy_rect_async} and the @code{target update}
+  directive for non-contiguous list items use the 3D memory-copy function
+  of the HSA library.  Higher dimensions call this functions in a loop and
+  are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, is the
   value returned by the HSA runtime library for 
@code{HSA_AMD_AGENT_INFO_UUID}.
   For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex 
digits,
@@ -7048,11 +7053,6 @@ The implementation remark:
   devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
   in the GCC manual.
-@item The OpenMP routines @code{omp_target_memcpy_rect} and
-  @code{omp_target_memcpy_rect_async} and the @code{target update}
-  directive for non-contiguous list items will use the 2D and 3D
-  memory-copy functions of the CUDA library.  Higher dimensions will
-  call those functions in a loop and are therefore supported.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
   the @code{access} trait is set to @code{cgroup}, and libgomp has
   been built for PTX ISA version 4.1 or higher (such as in GCC's
@@ -7070,6 +7070,11 @@ The implementation remark:
   @code{omp_thread_mem_alloc}, all use low-latency memory as first
   preference, and fall back to main graphics memory when the low-latency
   pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+  @code{omp_target_memcpy_rect_async} and the @code{target update}
+  directive for non-contiguous list items use the 2D and 3D memory-copy
+  functions of the CUDA library.  Higher dimensions call those functions
+  in a loop and are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, 
consists
   of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by
   the CUDA runtime library.  This UUID is output in grouped lower-case


[gcc r15-9264] Update gcc sv.po

2025-04-07 Thread Joseph Myers via Gcc-cvs
https://gcc.gnu.org/g:e1ff4a65f33c9f82256823ce108d22c01b820614

commit r15-9264-ge1ff4a65f33c9f82256823ce108d22c01b820614
Author: Joseph Myers 
Date:   Mon Apr 7 21:53:49 2025 +

Update gcc sv.po

* sv.po: Update.

Diff:
---
 gcc/po/sv.po | 794 +++
 1 file changed, 307 insertions(+), 487 deletions(-)

diff --git a/gcc/po/sv.po b/gcc/po/sv.po
index a8078133838f..af153f722f72 100644
--- a/gcc/po/sv.po
+++ b/gcc/po/sv.po
@@ -32,7 +32,7 @@ msgstr ""
 "Project-Id-Version: gcc 15.1-b20250316\n"
 "Report-Msgid-Bugs-To: https://gcc.gnu.org/bugs/\n";
 "POT-Creation-Date: 2025-03-14 22:06+\n"
-"PO-Revision-Date: 2025-03-30 15:46+0200\n"
+"PO-Revision-Date: 2025-04-06 09:20+0200\n"
 "Last-Translator: Göran Uddeborg \n"
 "Language-Team: Swedish \n"
 "Language: sv\n"
@@ -50585,10 +50585,8 @@ msgid "array initialized from parenthesized string 
constant"
 msgstr "vektor initierad från strängkonstant inom parentes"
 
 #: c/c-typeck.cc:7617
-#, fuzzy
-#| msgid "expected %qT but argument is of type %qT"
 msgid "expected %e but argument is of type %e"
-msgstr "%qT förväntades men argumentet har typ %qT"
+msgstr "%e förväntades men argumentet är av typen %e"
 
 #: c/c-typeck.cc:7651
 #, gcc-internal-format
@@ -50606,10 +50604,8 @@ msgid "built-in %qD declared here"
 msgstr "den inbyggda %qD är deklarerad här"
 
 #: c/c-typeck.cc:7716 c/c-typeck.cc:8624
-#, fuzzy
-#| msgid " declared here"
 msgid "%e declared here"
-msgstr " deklarerad här"
+msgstr "%e deklarerad här"
 
 #: c/c-typeck.cc:7909
 #, gcc-internal-format
@@ -50799,40 +50795,28 @@ msgid "passing argument %d of %qE from incompatible 
pointer type"
 msgstr "skickar argument %d till %qE från inkompatibel pekartyp"
 
 #: c/c-typeck.cc:8566
-#, fuzzy
-#| msgid "assignment to %qT from pointer to %qD with incompatible type %qT"
 msgid "assignment to %e from pointer to %qD with incompatible type %e"
-msgstr "tilldelning till %qT från pekare till %qD med inkompatibel typ %qT"
+msgstr "tilldelning till %e från pekare till %qD med inkompatibel typ %e"
 
 #: c/c-typeck.cc:8572
-#, fuzzy
-#| msgid "assignment to %qT from incompatible pointer type %qT"
 msgid "assignment to %e from incompatible pointer type %e"
-msgstr "tilldelning till %qT från inkompatibel pekartyp %qT"
+msgstr "tilldelning till %e från inkompatibel pekartyp %e"
 
 #: c/c-typeck.cc:8581
-#, fuzzy
-#| msgid "initialization of %qT from pointer to %qD with incompatible type %qT"
 msgid "initialization of %e from pointer to %qD with incompatible type %e"
-msgstr "initiering av %qT från pekare till %qD med inkompatibel typ %qT"
+msgstr "initiering av %e från pekare till %qD med inkompatibel typ %e"
 
 #: c/c-typeck.cc:8587
-#, fuzzy
-#| msgid "initialization of %qT from incompatible pointer type %qT"
 msgid "initialization of %e from incompatible pointer type %e"
-msgstr "initiering av %qT från inkompatibel pekartyp %qT"
+msgstr "initiering av %e från inkompatibel pekartyp %e"
 
 #: c/c-typeck.cc:8595
-#, fuzzy
-#| msgid "returning pointer to %qD of type %qT from a function with 
incompatible type %qT"
 msgid "returning pointer to %qD of type %e from a function with incompatible 
type %e"
-msgstr "returnerar pekare till %qD av typen %qT från en funktion med 
inkompatibel typ %qT"
+msgstr "returnerar pekare till %qD av typen %e från en funktion med 
inkompatibel typ %e"
 
 #: c/c-typeck.cc:8601
-#, fuzzy
-#| msgid "returning %qT from a function with incompatible return type %qT"
 msgid "returning %e from a function with incompatible return type %e"
-msgstr "returnerar %qT från en funktion med inkompatibel returtyp %qT"
+msgstr "returnerar %e från en funktion med inkompatibel returtyp %e"
 
 #: c/c-typeck.cc:8677
 #, gcc-internal-format
@@ -50940,16 +50924,14 @@ msgid "cannot initialize array of %qT from a string 
literal with type array of %
 msgstr "det går inte att initiera vektorn av %qT från en stränglitteral med 
typvektor %qT"
 
 #: c/c-typeck.cc:9380
-#, fuzzy, gcc-internal-format
-#| msgid "initializer-string for %qT is too long"
+#, gcc-internal-format
 msgid "initializer-string for array of %qT is too long (%wu chars into %wu 
available)"
-msgstr "initierarsträng för %qT är för lång"
+msgstr "initierarsträng för en vektor av %qT är för lång (%wu tecken in i %wu 
tillgängliga)"
 
 #: c/c-typeck.cc:9385
-#, fuzzy, gcc-internal-format
-#| msgid "initializer-string for %qT is too long"
+#, gcc-internal-format
 msgid "initializer-string for array of %qT is too long for C++ (%wu chars into 
%wu available)"
-msgstr "initierarsträng för %qT är för lång"
+msgstr "initierarsträng för vektor av %qT är för lång för C++ (%wu tecken in i 
%wu tillgängliga)"
 
 #: c/c-typeck.cc:9392
 #, gcc-internal-format
@@ -50988,10 +50970,9 @@ msgid "initializer element is not a constant 
expression"
 msgstr "initierarelement är inte ett konstant uttryck"
 
 #: c/c-typeck.cc:9560 cp/typeck2.cc:1340
-#, fuzzy, gcc-internal-format
-#| msgid "conversion f

[gcc r14-11538] LoongArch: Add LoongArch architecture detection to __float128 support in libgfortran and libquadmath

2025-04-07 Thread LuluCheng via Gcc-cvs
https://gcc.gnu.org/g:73a62377ae638ce85c0a40327e67b41c861fb156

commit r14-11538-g73a62377ae638ce85c0a40327e67b41c861fb156
Author: Lulu Cheng 
Date:   Mon Apr 7 10:00:27 2025 +0800

LoongArch: Add LoongArch architecture detection to __float128 support in 
libgfortran and libquadmath [PR119408].

In GCC14, LoongArch added __float128 as an alias for _Float128.
In commit r15-8962, support for q/Q suffixes for 128-bit floating point
numbers.  This will cause the compiler to automatically link libquadmath
when compiling Fortran programs.  But on LoongArch `long double` is
IEEE quad, so there is no need to implement libquadmath.
This causes link failure.

PR target/119408

libgfortran/ChangeLog:

* acinclude.m4: When checking for __float128 support, determine
whether the current architecture is LoongArch.  If so, return false.
* configure: Regenerate.

libquadmath/ChangeLog:

* configure.ac: When checking for __float128 support, determine
whether the current architecture is LoongArch.  If so, return false.
* configure: Regenerate.

Sigend-off-by: Xi Ruoyao 
Sigend-off-by: Jakub Jelinek 

(cherry picked from commit 1534f0099c98ea14c08a401302b05edf2231f411)

Diff:
---
 libgfortran/acinclude.m4 | 4 
 libgfortran/configure| 8 
 libquadmath/configure| 8 
 libquadmath/configure.ac | 4 
 4 files changed, 24 insertions(+)

diff --git a/libgfortran/acinclude.m4 b/libgfortran/acinclude.m4
index a73207e54656..23fd621e5188 100644
--- a/libgfortran/acinclude.m4
+++ b/libgfortran/acinclude.m4
@@ -274,6 +274,10 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
   AC_CACHE_CHECK([whether we have a usable _Float128 type],
  libgfor_cv_have_float128, [
GCC_TRY_COMPILE_OR_LINK([
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
diff --git a/libgfortran/configure b/libgfortran/configure
index 774dd52fc95d..16bad2924749 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -30288,6 +30288,10 @@ else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
@@ -30341,6 +30345,10 @@ fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
diff --git a/libquadmath/configure b/libquadmath/configure
index 49d70809218c..f82dd3d0d6d4 100755
--- a/libquadmath/configure
+++ b/libquadmath/configure
@@ -12843,6 +12843,10 @@ else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else
@@ -12894,6 +12898,10 @@ fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else
diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
index 349be2607c64..c64a84892191 100644
--- a/libquadmath/configure.ac
+++ b/libquadmath/configure.ac
@@ -233,6 +233,10 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, [test 
"x$quadmath_use_symver" = xsun])
 
 AC_CACHE_CHECK([whether __float128 is supported], [libquad_cv_have_float128],
   [GCC_TRY_COMPILE_OR_LINK([
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else


[gcc r15-9268] gomp: Various fixes for SVE types [PR101018]

2025-04-07 Thread Tejas Belagod via Gcc-cvs
https://gcc.gnu.org/g:001fb23ae46ba4bd1b5e43f756fa89e6fb94ce18

commit r15-9268-g001fb23ae46ba4bd1b5e43f756fa89e6fb94ce18
Author: Richard Sandiford 
Date:   Fri May 12 10:33:25 2023 +0100

gomp: Various fixes for SVE types [PR101018]

Various parts of the omp code checked whether the size of a decl
was an INTEGER_CST in order to determine whether the decl was
variable-sized or not.  If it was variable-sized, it was expected
to have a DECL_VALUE_EXPR replacement, as for VLAs.

This patch uses poly_int_tree_p instead, so that variable-length
SVE vectors are treated like constant-length vectors.  This means
that some structures become poly_int-sized, with some fields at
poly_int offsets, but we already have code to handle that.

An alternative would have been to handle the data via indirection
instead.  However, that's likely to be more complicated, and it
would contradict is_variable_sized, which already uses a check
for TREE_CONSTANT rather than INTEGER_CST.

gimple_add_tmp_var should probably not add a safelen of 1
for SVE vectors, but that's really a separate thing and might
be hard to test.

Co-authored-by: Tejas Belagod 

gcc/
PR middle-end/101018
* poly-int.h (can_and_p): New function.
* fold-const.cc (poly_int_binop): Use it to optimize BIT_AND_EXPRs
involving POLY_INT_CSTs.
* gimplify.cc (omp_notice_variable): Use poly_int_tree_p instead
of INTEGER_CST when checking for constant-sized omp data.
(gimplify_adjust_omp_clauses_1): Likewise.
(gimplify_adjust_omp_clauses): Likewise.
* omp-low.cc (scan_sharing_clauses): Likewise.

Diff:
---
 gcc/fold-const.cc |  7 +++
 gcc/gimplify.cc   | 19 +--
 gcc/omp-low.cc|  2 +-
 gcc/poly-int.h| 19 +++
 4 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 3e20538de9fd..1275ef75315a 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1284,6 +1284,13 @@ poly_int_binop (poly_wide_int &res, enum tree_code code,
return false;
   break;
 
+case BIT_AND_EXPR:
+  if (TREE_CODE (arg2) != INTEGER_CST
+ || !can_and_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
+&res))
+   return false;
+  break;
+
 case BIT_IOR_EXPR:
   if (TREE_CODE (arg2) != INTEGER_CST
  || !can_ior_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index e90220cc2a05..55cab7a74a88 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -9301,7 +9301,8 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
decl, bool in_code)
   && (flags & (GOVD_SEEN | GOVD_LOCAL)) == GOVD_SEEN
   && DECL_SIZE (decl))
 {
-  if (TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+  tree size;
+  if (!poly_int_tree_p (DECL_SIZE (decl)))
{
  splay_tree_node n2;
  tree t = DECL_VALUE_EXPR (decl);
@@ -9312,16 +9313,14 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
decl, bool in_code)
  n2->value |= GOVD_SEEN;
}
   else if (omp_privatize_by_reference (decl)
-  && TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl)))
-  && (TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl
-  != INTEGER_CST))
+  && (size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl
+  && !poly_int_tree_p (size))
{
  splay_tree_node n2;
- tree t = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl)));
- gcc_assert (DECL_P (t));
- n2 = splay_tree_lookup (ctx->variables, (splay_tree_key) t);
+ gcc_assert (DECL_P (size));
+ n2 = splay_tree_lookup (ctx->variables, (splay_tree_key) size);
  if (n2)
-   omp_notice_variable (ctx, t, true);
+   omp_notice_variable (ctx, size, true);
}
 }
 
@@ -14581,7 +14580,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void 
*data)
   if ((gimplify_omp_ctxp->region_type & ORT_ACC) == 0)
OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause) = 1;
   if (DECL_SIZE (decl)
- && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+ && !poly_int_tree_p (DECL_SIZE (decl)))
{
  tree decl2 = DECL_VALUE_EXPR (decl);
  gcc_assert (INDIRECT_REF_P (decl2));
@@ -15322,7 +15321,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, 
gimple_seq body, tree *list_p,
  if (!DECL_P (decl))
break;
  if (DECL_SIZE (decl)
- && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+ && !poly_int_tree_p (DECL_SIZE (decl)))
{
  tree decl2 = DECL_VALUE_EXPR (decl);
  gcc_assert (INDIRECT_REF_P (decl2));
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index e369df6e8f10..e1036adab288 100644
--- a/gc

[gcc r15-9269] Add function to strip pointer type and get down to the actual pointee type.

2025-04-07 Thread Tejas Belagod via Gcc-cvs
https://gcc.gnu.org/g:7058b2d0c9d2240c4b30c97cfa4cae33e2d845cc

commit r15-9269-g7058b2d0c9d2240c4b30c97cfa4cae33e2d845cc
Author: Tejas Belagod 
Date:   Sun Oct 13 15:50:23 2024 +0530

Add function to strip pointer type and get down to the actual pointee type.

Add a function to traverse down the pointer layers to the pointee type.

gcc/ChangeLog:
* tree.h (strip_pointer_types): New.

Diff:
---
 gcc/tree.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/gcc/tree.h b/gcc/tree.h
index 55f97f9f9994..99f261776281 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5053,6 +5053,17 @@ strip_array_types (tree type)
   return type;
 }
 
+/* Recursively traverse down pointer type layers to pointee type.  */
+
+inline const_tree
+strip_pointer_types (const_tree type)
+{
+  while (POINTER_TYPE_P (type))
+type = TREE_TYPE (type);
+
+  return type;
+}
+
 /* Desription of the reason why the argument of valid_constant_size_p
is not a valid size.  */
 enum cst_size_error {


[gcc r15-9266] RISC-V: Disable unsupported vsext/vzext patterns for XTheadVector.

2025-04-07 Thread Ma Jin via Gcc-cvs
https://gcc.gnu.org/g:196b45caca0aae57a95bffcdd5c188994317de08

commit r15-9266-g196b45caca0aae57a95bffcdd5c188994317de08
Author: Jin Ma 
Date:   Mon Apr 7 14:21:50 2025 +0800

RISC-V: Disable unsupported vsext/vzext patterns for XTheadVector.

XThreadVector does not support the vsext/vzext instructions; however,
due to the reuse of RVV optimizations, it may generate these instructions
in certain cases. To prevent the error "Unknown opcode 'th.vsext.vf2',"
we should disable these patterns.

V2:
Change the value of dg-do in the test case from assemble to compile, and
remove the -save-temps option.

gcc/ChangeLog:

* config/riscv/vector.md: Disable vsext/vzext for XTheadVector.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/xtheadvector/vsext.c: New test.
* gcc.target/riscv/rvv/xtheadvector/vzext.c: New test.

Diff:
---
 gcc/config/riscv/vector.md |  6 +++---
 .../gcc.target/riscv/rvv/xtheadvector/vsext.c  | 24 ++
 .../gcc.target/riscv/rvv/xtheadvector/vzext.c  | 24 ++
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8ee43cf0ce1c..51eb64fb1226 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3939,7 +3939,7 @@
  (any_extend:VWEXTI
(match_operand: 3 "register_operand" "   vr,   vr"))
  (match_operand:VWEXTI 2 "vector_merge_operand" "   vu,
0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "vext.vf2\t%0,%3%p1"
   [(set_attr "type" "vext")
(set_attr "mode" "")])
@@ -3959,7 +3959,7 @@
  (any_extend:VQEXTI
(match_operand: 3 "register_operand" "   vr,   vr"))
  (match_operand:VQEXTI 2 "vector_merge_operand"   "   vu,0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "vext.vf4\t%0,%3%p1"
   [(set_attr "type" "vext")
(set_attr "mode" "")])
@@ -3979,7 +3979,7 @@
  (any_extend:VOEXTI
(match_operand: 3 "register_operand" "   vr,   vr"))
  (match_operand:VOEXTI 2 "vector_merge_operand"  "   vu,0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "vext.vf8\t%0,%3%p1"
   [(set_attr "type" "vext")
(set_attr "mode" "")])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vsext.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vsext.c
new file mode 100644
index ..55db28304c7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vsext.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gc_xtheadvector -mabi=lp64d -O3" } */
+
+#include 
+
+struct a
+{
+  int b[];
+} c (vint32m4_t), d;
+
+char e;
+char *f;
+
+void g ()
+{
+  int h;
+  vint32m4_t i;
+  vint8m1_t j = __riscv_vlse8_v_i8m1 (&e, d.b[3], h);
+  vint16m2_t k = __riscv_vwadd_vx_i16m2 (j, 0, h);
+  i = __riscv_vwmacc_vx_i32m4 (i, f[0], k, h);
+  c (i);
+}
+
+/* { dg-final { scan-assembler-not {th\.vsext\.vf2} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vzext.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vzext.c
new file mode 100644
index ..fcb565991c6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vzext.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gc_xtheadvector -mabi=lp64d -O3" } */
+
+#include 
+
+struct a
+{
+  int b[];
+} c (vuint32m4_t), d;
+
+char e;
+char *f;
+
+void g ()
+{
+  int h;
+  vuint32m4_t i;
+  vuint8m1_t j = __riscv_vlse8_v_u8m1 (&e, d.b[3], h);
+  vuint16m2_t k = __riscv_vwaddu_vx_u16m2 (j, 0, h);
+  i = __riscv_vwmaccu_vx_u32m4 (i, f[0], k, h);
+  c (i);
+}
+
+/* { dg-final { scan-assembler-not {th\.vzext\.vf2} } } */


[gcc r14-11536] libstdc++: Work around C++20 tuple> constraint recursion [PR116440]

2025-04-07 Thread Patrick Palka via Libstdc++-cvs
https://gcc.gnu.org/g:5cdf31f08074d5ed8d07e93390de573938ad1eb3

commit r14-11536-g5cdf31f08074d5ed8d07e93390de573938ad1eb3
Author: Patrick Palka 
Date:   Thu Mar 13 19:55:00 2025 -0400

libstdc++: Work around C++20 tuple> constraint recursion 
[PR116440]

The type tuple> is clearly copy/move constructible, but for
reasons that are not yet completely understood checking this triggers
constraint recursion with our C++20 tuple implementation (but not the
C++17 implementation).

It turns out this recursion stems from considering the non-template
tuple(const _Elements&) constructor during the copy/move constructibility
check.  Considering this constructor is ultimately redundant, since the
defaulted copy/move constructors are better matches.

GCC has a non-standard "perfect candidate" optimization[1] that causes
overload resolution to shortcut considering template candidates if we
find a (non-template) perfect candidate.  So to work around this issue
(and as a general compile-time optimization) this patch turns the
problematic constructor into a template so that GCC doesn't consider it
when checking for copy/move constructibility of this tuple type.

Changing the template-ness of a constructor can affect overload
resolution (since template-ness is a tiebreaker) so there's a risk this
change could e.g. introduce overload resolution ambiguities.  But the
original C++17 implementation has long defined this constructor as a
template (in order to constrain it etc), so doing the same thing in the
C++20 mode should naturally be quite safe.

The testcase still fails with Clang (in C++20 mode) since it doesn't
implement said optimization.

[1]: See r11-7287-g187d0d5871b1fa and
https://isocpp.org/files/papers/P3606R0.html

PR libstdc++/116440

libstdc++-v3/ChangeLog:

* include/std/tuple (tuple::tuple(const _Elements&...))
[C++20]: Turn into a template.
* testsuite/20_util/tuple/116440.C: New test.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit 6570fa6f2612a4e4ddd2fcfc119369a1a48656e4)

Diff:
---
 libstdc++-v3/include/std/tuple| 14 +++--
 libstdc++-v3/testsuite/20_util/tuple/116440.C | 29 +++
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index 85a380842b6e..c46665056818 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -969,12 +969,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   : _Inherited()
   { }
 
-  constexpr explicit(!__convertible())
-  tuple(const _Elements&... __elements)
-  noexcept(__nothrow_constructible())
-  requires (__constructible())
-  : _Inherited(__elements...)
-  { }
+  // Defined as a template to work around PR libstdc++/116440.
+  template
+   constexpr explicit(!__convertible())
+   tuple(const _Elements&... __elements)
+   noexcept(__nothrow_constructible())
+   requires (__constructible())
+   : _Inherited(__elements...)
+   { }
 
   template
requires (__disambiguating_constraint<_UTypes...>())
diff --git a/libstdc++-v3/testsuite/20_util/tuple/116440.C 
b/libstdc++-v3/testsuite/20_util/tuple/116440.C
new file mode 100644
index ..12259134d251
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/tuple/116440.C
@@ -0,0 +1,29 @@
+// PR libstdc++/116440 - std::tuple> does not compile
+// { dg-do compile { target c++17 } }
+
+#include 
+#include 
+#include 
+
+template 
+using TupleTuple = std::tuple>;
+
+struct EmbedAny {
+std::any content;
+};
+
+static_assert(std::is_copy_constructible>::value);
+static_assert(std::is_move_constructible>::value);
+
+static_assert(std::is_copy_constructible>::value);
+static_assert(std::is_move_constructible>::value);
+
+static_assert(std::is_constructible_v>);
+
+struct EmbedAnyWithZeroSizeArray {
+void* pad[0];
+std::any content;
+};
+
+static_assert(std::is_copy_constructible>::value);
+static_assert(std::is_move_constructible>::value);


[gcc r15-9267] i386: Add PTA_AVX10_1_256 to PTA_DIAMONDRAPIDS

2025-04-07 Thread Haochen Jiang via Gcc-cvs
https://gcc.gnu.org/g:2b809d6f4fb62aeccf482a889bc2775e5d94ef80

commit r15-9267-g2b809d6f4fb62aeccf482a889bc2775e5d94ef80
Author: Haochen Jiang 
Date:   Fri Mar 28 16:16:27 2025 +0800

i386: Add PTA_AVX10_1_256 to PTA_DIAMONDRAPIDS

For -march= handling, PTA_AVX10_1 will not imply PTA_AVX10_1_256,
resulting in TARGET_AVX10_1 becoming true while TARGET_AVX10_1_256
false. Since we will check TARGET_AVX10_1_256 in GCC 15 for AVX512
feature enabling for AVX10, -march=diamondrapids will not enable
512 bit register and x/ymm16+.

Since AVX10 will get a further clean up in GCC 16 and will help
PTA_DIAMONDRAPIDS reusing PTA_GRANITERAPIDS_D, the imply would become
obvious again, I plan not to add the testcase but just to fix the issue
in GCC 15.

gcc/ChangeLog:

* config/i386/i386.h (PTA_DIAMONDRAPIDS): Add PTA_AVX10_1_256.

Diff:
---
 gcc/config/i386/i386.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 13da3d888d4e..8507243d726b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2449,11 +2449,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = 
PTA_SKYLAKE | PTA_PKU | PTA_SHA
   | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
   | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
   | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
-  | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1
-  | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
-  | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
-  | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
-  | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+  | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
+  | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
+  | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
+  | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
+  | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
 
 constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
   | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3


[gcc r15-9247] testsuite: arm: Tighten compile options for short-vfp-1.c [PR119556]

2025-04-07 Thread Christophe Lyon via Gcc-cvs
https://gcc.gnu.org/g:2a155fab5a015ed4500474bab9b6ce0c4bd6c52e

commit r15-9247-g2a155fab5a015ed4500474bab9b6ce0c4bd6c52e
Author: Christophe Lyon 
Date:   Thu Apr 3 18:45:51 2025 +

testsuite: arm: Tighten compile options for short-vfp-1.c [PR119556]

The previous version of this test required arch v6+ (for sxth), and
the number of vmov depended on the float-point ABI (where softfp
needed more of them to transfer floating-point values to and from
general registers).

With this patch we require arch v7-a, vfp FPU and -mfloat-abi=hard, we
also use -O2 to clean the generated code and convert
scan-assembler-times directives into check-function-bodies.

Tested on arm-none-linux-gnueabihf and several flavours of
arm-none-eabi.

gcc/testsuite/ChangeLog:

PR target/119556
* gcc.target/arm/short-vfp-1.c: Improve dg directives.

Diff:
---
 gcc/testsuite/gcc.target/arm/short-vfp-1.c | 46 --
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/short-vfp-1.c 
b/gcc/testsuite/gcc.target/arm/short-vfp-1.c
index f6866c4f6012..418fc279af05 100644
--- a/gcc/testsuite/gcc.target/arm/short-vfp-1.c
+++ b/gcc/testsuite/gcc.target/arm/short-vfp-1.c
@@ -1,45 +1,75 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_vfp_ok } */
-/* { dg-add-options arm_vfp } */
+/* { dg-require-effective-target arm_arch_v7a_fp_hard_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_arch_v7a_fp_hard } */
+/* { dg-final { check-function-bodies "**" "" } } */
 
+/*
+** test_sisf:
+** vcvt.s32.f32(s[0-9]+), s0
+** vmovr0, \1  @ int
+** bx  lr
+*/
 int
 test_sisf (float x)
 {
   return (int)x;
 }
 
+/*
+** test_hisf:
+** vcvt.s32.f32(s[0-9]+), s0
+** vmov(r[0-9]+), \1   @ int
+** sxthr0, \2
+** bx  lr
+*/
 short
 test_hisf (float x)
 {
   return (short)x;
 }
 
+/*
+** test_sfsi:
+** vmov(s[0-9]+), r0   @ int
+** vcvt.f32.s32s0, \1
+** bx  lr
+*/
 float
 test_sfsi (int x)
 {
   return (float)x;
 }
 
+/*
+** test_sfhi:
+** vmov(s[0-9]+), r0   @ int
+** vcvt.f32.s32s0, \1
+** bx  lr
+*/
 float
 test_sfhi (short x)
 {
   return (float)x;
 }
 
+/*
+** test_hisi:
+** sxthr0, r0
+** bx  lr
+*/
 short
 test_hisi (int x)
 {
   return (short)x;
 }
 
+/*
+** test_sihi:
+** bx  lr
+*/
 int
 test_sihi (short x)
 {
   return (int)x;
 }
-
-/* { dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+, s[0-9]+} 2 } } 
*/
-/* { dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+, s[0-9]+} 2 } } 
*/
-/* { dg-final { scan-assembler-times {vmov\tr[0-9]+, s[0-9]+} 2 } } */
-/* { dg-final { scan-assembler-times {vmov\ts[0-9]+, r[0-9]+} 2 } } */
-/* { dg-final { scan-assembler-times {sxth\tr[0-9]+, r[0-9]+} 2 } } */


[gcc(refs/users/alfierichards/heads/fmv_c)] Change target_version semantics to follow ACLE specification.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:9a206e0522b42342b7356369d0d77f28c044f091

commit 9a206e0522b42342b7356369d0d77f28c044f091
Author: Alfie Richards 
Date:   Thu Feb 13 15:30:45 2025 +

Change target_version semantics to follow ACLE specification.

This changes behavior of target_clones and target_version attributes
to be inline with what is specified in the Arm C Language Extension.

Notably this changes the scope and signature of multiversioned functions
to that of the default version, and changes the resolver to be
created at the implementation of the default version.

This is achieved by changing the C++ front end to no longer resolve any
non-default version decls in lookup, and by moving dipatching
for default_target sets to reuse the dispatching logic for target_clones
in multiple_target.cc.

The dispatching in create_dispatcher_calls is changed for the case of
a lone annotated default function to change the dispatched symbol to
be an alias for the mangled default function.

gcc/ChangeLog:

* cgraphunit.cc (analyze_functions): Add logic for target version
dependencies.
* ipa.cc (symbol_table::remove_unreachable_nodes): Ditto.
* multiple_target.cc (create_dispatcher_calls): Change to support
target version semantics.
(ipa_target_clone): Change to dispatch all function sets in
target_version semantics.

gcc/cp/ChangeLog:

* call.cc (add_candidates): Change to not resolve non-default 
versions in
target_version semantics.
* class.cc (resolve_address_of_overloaded_function): Ditto.
* cp-gimplify.cc (cp_genericize_r): Change logic to not apply for
target_version semantics.
* decl.cc (start_decl): Change to mark and therefore mangle all
target_version decls.
(start_preparsed_function): Ditto.
* typeck.cc (cp_build_function_call_vec): Add error for calling 
unresolvable
non-default node in target_version semantics.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-1.C: Change for target_version semantics.
* g++.target/aarch64/mv-symbols2.C: Ditto.
* g++.target/aarch64/mv-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols4.C: Ditto.
* g++.target/aarch64/mv-symbols5.C: Ditto.
* g++.target/aarch64/mvc-symbols3.C: Ditto.
* g++.target/riscv/mv-symbols2.C: Ditto.
* g++.target/riscv/mv-symbols3.C: Ditto.
* g++.target/riscv/mv-symbols4.C: Ditto.
* g++.target/riscv/mv-symbols5.C: Ditto.
* g++.target/riscv/mvc-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols10.C: New test.
* g++.target/aarch64/mv-symbols11.C: New test.
* g++.target/aarch64/mv-symbols12.C: New test.
* g++.target/aarch64/mv-symbols13.C: New test.
* g++.target/aarch64/mv-symbols6.C: New test.
* g++.target/aarch64/mv-symbols7.C: New test.
* g++.target/aarch64/mv-symbols8.C: New test.
* g++.target/aarch64/mv-symbols9.C: New test.

Diff:
---
 gcc/cgraphunit.cc   |  9 +++
 gcc/cp/call.cc  | 10 
 gcc/cp/class.cc | 13 -
 gcc/cp/cp-gimplify.cc   | 11 ++--
 gcc/cp/decl.cc  | 14 +
 gcc/cp/typeck.cc| 10 
 gcc/ipa.cc  | 11 
 gcc/multiple_target.cc  | 73 +
 gcc/testsuite/g++.target/aarch64/mv-1.C |  4 ++
 gcc/testsuite/g++.target/aarch64/mv-symbols10.C | 27 +
 gcc/testsuite/g++.target/aarch64/mv-symbols11.C | 30 ++
 gcc/testsuite/g++.target/aarch64/mv-symbols12.C | 28 ++
 gcc/testsuite/g++.target/aarch64/mv-symbols13.C | 28 ++
 gcc/testsuite/g++.target/aarch64/mv-symbols2.C  | 12 ++--
 gcc/testsuite/g++.target/aarch64/mv-symbols3.C  |  6 +-
 gcc/testsuite/g++.target/aarch64/mv-symbols4.C  |  6 +-
 gcc/testsuite/g++.target/aarch64/mv-symbols5.C  |  6 +-
 gcc/testsuite/g++.target/aarch64/mv-symbols6.C  | 25 +
 gcc/testsuite/g++.target/aarch64/mv-symbols7.C  | 48 
 gcc/testsuite/g++.target/aarch64/mv-symbols8.C  | 46 
 gcc/testsuite/g++.target/aarch64/mv-symbols9.C  | 43 +++
 gcc/testsuite/g++.target/aarch64/mvc-symbols3.C | 12 ++--
 gcc/testsuite/g++.target/riscv/mv-symbols2.C| 12 ++--
 gcc/testsuite/g++.target/riscv/mv-symbols3.C|  6 +-
 gcc/testsuite/g++.target/riscv/mv-symbols4.C|  6 +-
 gcc/testsuite/g++.target/riscv/mv-symbols5.C|  6 +-
 gcc/testsuite/g++.target/riscv/mvc-symbols3.C   | 12 ++--
 27 files changed, 456 insertions(+), 58 deletions(-)

diff --git

[gcc(refs/users/alfierichards/heads/fmv_c)] Refactor FMV name mangling.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:fcce1c2be2d17cb138ec6869be319da39b51c7a8

commit fcce1c2be2d17cb138ec6869be319da39b51c7a8
Author: Alfie Richards 
Date:   Wed Feb 12 14:13:02 2025 +

Refactor FMV name mangling.

This patch is an overhaul of how FMV name mangling works. Previously
mangling logic was duplicated in several places across both target
specific and independent code. This patch changes this such that all
mangling is done in targetm.mangle_decl_assembler_name (including for the
dispatched symbol and dispatcher resolver).

This allows for the removing of previous hacks, such as where the default
mangled decl's assembler name was unmangled to then remangle all versions
and the resolver and dispatched symbol.

This does introduce a change though (shown in test changes) where
previously x86 for target annotated FMV sets set the function name to
the assembler name and remangled this. This was hard to reproduce without
resorting to hacks I wasn't comfortable with so the mangling is changed
to append ".ifunc" which matches clang.

This change also refactors expand_target_clone using
targetm.mangle_decl_assembler_name for mangling and get_clone_versions.

gcc/ChangeLog:

* attribs.cc (make_dispatcher_decl): Move duplicated cgraph logic 
into
this function and change to use targetm.mangle_decl_assembler_name 
for
mangling.
* config/aarch64/aarch64.cc (aarch64_parse_fmv_features): Change to
support string_slice.
(aarch64_process_target_version_attr): Ditto.
(get_feature_mask_for_version): Ditto.
(aarch64_mangle_decl_assembler_name): Add logic for mangling 
dispatched
symbol and resolver.
(get_suffixed_assembler_name): Removed.
(make_resolver_func): Refactor to use
aarch64_mangle_decl_assembler_name for mangling.
(aarch64_generate_version_dispatcher_body): Remove remangling.
(aarch64_get_function_versions_dispatcher): Refactor to remove
duplicated cgraph logic.
* config/i386/i386-features.cc (is_valid_asm_symbol): Moved from
multiple_target.cc.
(create_new_asm_name): Ditto.
(ix86_mangle_function_version_assembler_name): Refactor to use
clone_identifier and to mangle default.
(ix86_mangle_decl_assembler_name): Add logic for mangling dispatched
symbol and resolver.
(ix86_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
(make_resolver_func): Refactor to use 
ix86_mangle_decl_assembler_name
for mangling.
* config/riscv/riscv.cc (riscv_mangle_decl_assembler_name): Add 
logic
for FMV mangling.
(get_suffixed_assembler_name): Removed.
(make_resolver_func): Refactor to use 
riscv_mangle_decl_assembler_name
for mangling.
(riscv_generate_version_dispatcher_body): Remove unnecessary 
remangling.
(riscv_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
* config/rs6000/rs6000.cc (rs6000_mangle_decl_assembler_name): New
function.
(rs6000_get_function_versions_dispatcher): Remove duplicated cgraph
logic.
(make_resolver_func): Refactor to use 
rs6000_mangle_decl_assembler_name
for mangling.
(is_valid_asm_symbol): Move from multiple_target.cc.
(create_new_asm_name): Ditto.
(rs6000_mangle_function_version_assembler_name): New function.
* multiple_target.cc (create_dispatcher_calls): Remove mangling 
code.
(get_attr_str): Removed.
(separate_attrs): Ditto.
(is_valid_asm_symbol): Moved to target specific.
(create_new_asm_name): Ditto.
(expand_target_clones): Refactor to use
targetm.mangle_decl_assembler_name for mangling and be more general.
* tree.cc (get_target_clone_attr_len): Removed.
* tree.h (get_target_clone_attr_len): Removed.

gcc/cp/ChangeLog:

* decl.cc (maybe_mark_function_versioned): Change to insert 
function version
and therefore record assembler name.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv-symbols1.C: Update x86 FMV mangling.
* g++.target/i386/mv-symbols3.C: Ditto.
* g++.target/i386/mv-symbols4.C: Ditto.
* g++.target/i386/mv-symbols5.C: Ditto.

Diff:
---
 gcc/attribs.cc  |  44 --
 gcc/config/aarch64/aarch64.cc   | 160 -
 gcc/config/i386/i386-features.cc| 108 +-
 gcc/config/riscv/riscv.cc   | 101 +
 gcc/config/rs6000/rs6000.cc | 115 ---
 gcc/

[gcc(refs/users/alfierichards/heads/fmv_c)] Update is_function_default_version to work with target_version.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:4a1e6acecf55e8d4b008fdf4efba6160e9cd1108

commit 4a1e6acecf55e8d4b008fdf4efba6160e9cd1108
Author: Alfie Richards 
Date:   Fri Jan 31 10:47:14 2025 +

Update is_function_default_version to work with target_version.

Notably this respects target_version semantics where an unannotated
function can be the default version.

gcc/ChangeLog:

* attribs.cc (is_function_default_version): Add target_version 
logic.

Diff:
---
 gcc/attribs.cc | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 56dd18c2fa8e..f6667839c013 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1279,18 +1279,31 @@ make_dispatcher_decl (const tree decl)
   return func_decl;
 }
 
-/* Returns true if DECL is multi-versioned using the target attribute, and this
-   is the default version.  This function can only be used for targets that do
-   not support the "target_version" attribute.  */
+/* Returns true if DECL a multiversioned default.
+   With the target attribute semantics, returns true if the function is marked
+   as default with the target version.
+   With the target_version attribute semantics, returns true if the function
+   is either not annotated, or annotated as default.  */
 
 bool
 is_function_default_version (const tree decl)
 {
-  if (TREE_CODE (decl) != FUNCTION_DECL
-  || !DECL_FUNCTION_VERSIONED (decl))
+  tree attr;
+  if (TREE_CODE (decl) != FUNCTION_DECL)
 return false;
-  tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
-  gcc_assert (attr);
+  if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+{
+  if (!DECL_FUNCTION_VERSIONED (decl))
+   return false;
+  attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  gcc_assert (attr);
+}
+  else
+{
+  attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
+  if (!attr)
+   return true;
+}
   attr = TREE_VALUE (TREE_VALUE (attr));
   return (TREE_CODE (attr) == STRING_CST
  && strcmp (TREE_STRING_POINTER (attr), "default") == 0);


[gcc(refs/users/alfierichards/heads/fmv_c)] Remove FMV beta warning.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:9f857996ad74fabf86f174b5cc6b8052d7131fa8

commit 9f857996ad74fabf86f174b5cc6b8052d7131fa8
Author: Alfie Richards 
Date:   Thu Feb 13 16:52:28 2025 +

Remove FMV beta warning.

This patch removes the warning for target_version and target_clones
in aarch64 as it is now spec compliant.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_process_target_version_attr):
Remove warning.
* config/aarch64/aarch64.opt: Mark -Wno-experimental-fmv-target
deprecated.
* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-1.C: Remove option.
* g++.target/aarch64/mv-and-mvc-error1.C: Ditto.
* g++.target/aarch64/mv-and-mvc-error2.C: Ditto.
* g++.target/aarch64/mv-and-mvc-error3.C: Ditto.
* g++.target/aarch64/mv-and-mvc1.C: Ditto.
* g++.target/aarch64/mv-and-mvc2.C: Ditto.
* g++.target/aarch64/mv-and-mvc3.C: Ditto.
* g++.target/aarch64/mv-and-mvc4.C: Ditto.
* g++.target/aarch64/mv-error1.C: Ditto.
* g++.target/aarch64/mv-error2.C: Ditto.
* g++.target/aarch64/mv-error3.C: Ditto.
* g++.target/aarch64/mv-error4.C: Ditto.
* g++.target/aarch64/mv-error5.C: Ditto.
* g++.target/aarch64/mv-error6.C: Ditto.
* g++.target/aarch64/mv-error7.C: Ditto.
* g++.target/aarch64/mv-error8.C: Ditto.
* g++.target/aarch64/mv-pragma.C: Ditto.
* g++.target/aarch64/mv-symbols1.C: Ditto.
* g++.target/aarch64/mv-symbols10.C: Ditto.
* g++.target/aarch64/mv-symbols11.C: Ditto.
* g++.target/aarch64/mv-symbols12.C: Ditto.
* g++.target/aarch64/mv-symbols13.C: Ditto.
* g++.target/aarch64/mv-symbols2.C: Ditto.
* g++.target/aarch64/mv-symbols3.C: Ditto.
* g++.target/aarch64/mv-symbols4.C: Ditto.
* g++.target/aarch64/mv-symbols5.C: Ditto.
* g++.target/aarch64/mv-symbols6.C: Ditto.
* g++.target/aarch64/mv-symbols7.C: Ditto.
* g++.target/aarch64/mv-symbols8.C: Ditto.
* g++.target/aarch64/mv-symbols9.C: Ditto.
* g++.target/aarch64/mvc-error1.C: Ditto.
* g++.target/aarch64/mvc-error2.C: Ditto.
* g++.target/aarch64/mvc-symbols1.C: Ditto.
* g++.target/aarch64/mvc-symbols2.C: Ditto.
* g++.target/aarch64/mvc-symbols3.C: Ditto.
* g++.target/aarch64/mvc-symbols4.C: Ditto.
* g++.target/aarch64/mv-warning1.C: Removed.
* g++.target/aarch64/mvc-warning1.C: Removed.

Diff:
---
 gcc/config/aarch64/aarch64.cc| 9 -
 gcc/config/aarch64/aarch64.opt   | 2 +-
 gcc/doc/invoke.texi  | 5 +
 gcc/testsuite/g++.target/aarch64/mv-1.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc-error3.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc1.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc2.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc3.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-and-mvc4.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error1.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error2.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error3.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error4.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error5.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error6.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error7.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-error8.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-pragma.C | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols1.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols10.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols11.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols12.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols13.C  | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols2.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols3.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols4.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols5.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols6.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols7.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols8.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-symbols9.C   | 1 -
 gcc/testsuite/g++.target/aarch64/mv-warning1.C   | 9 -
 gcc/testsuite/g++.target/aarch64/mvc-error1.C| 1 -
 gcc/testsuite/g++.target/aarch64/mvc-error2.C| 1 -
 gcc/testsuite/g++.target/aarch64/mvc-symbols1.C  | 1 -
 gcc/t

[gcc(refs/users/meissner/heads/work200-orig)] Add REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:c3edff28246667ccb84cbcef315833f34d765176

commit c3edff28246667ccb84cbcef315833f34d765176
Author: Michael Meissner 
Date:   Mon Apr 7 09:58:57 2025 -0400

Add REVISION.

2025-04-07  Michael Meissner  

gcc/

* REVISION: New file for branch.

Diff:
---
 gcc/REVISION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..d881cc5456e0
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work200-orig branch


[gcc] Created branch 'meissner/heads/work200-submit' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-submit' was created in namespace 
'refs/users' pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc r15-9249] tree-optimization/119640 - ICE with vectorized shift placement

2025-04-07 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a4abf0fb902816a2508dda5956f407fc27821d88

commit r15-9249-ga4abf0fb902816a2508dda5956f407fc27821d88
Author: Richard Biener 
Date:   Mon Apr 7 11:27:19 2025 +0200

tree-optimization/119640 - ICE with vectorized shift placement

When the whole shift is invariant but the shift amount needs
to be converted and a vector shift used we can mess up placement
of vector stmts because we do not make SLP scheduling aware of
the need to insert code for it.  The following mitigates this
by more conservative placement of such code in vectorizable_shift.

PR tree-optimization/119640
* tree-vect-stmts.cc (vectorizable_shift): Always insert code
for one of our SLP operands before the code for the vector
shift itself.

* gcc.dg/vect/pr119640.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr119640.c | 17 +
 gcc/tree-vect-stmts.cc   | 11 +++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr119640.c 
b/gcc/testsuite/gcc.dg/vect/pr119640.c
new file mode 100644
index ..8872817ac31c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119640.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-funswitch-loops" } */
+
+int save, mask_nbits;
+
+void execute(long imm)
+{
+  long shift = 0;
+  int destReg[4];
+  for (unsigned i = 0; i < 4; i++)
+{
+  if (imm)
+   shift = 1ULL << mask_nbits;
+  destReg[i] = shift;
+  save = destReg[0];
+}
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3005ae6eaaea..7f874354e75e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6750,13 +6750,16 @@ vectorizable_shift (vec_info *vinfo,
 {
   if (was_scalar_shift_arg)
{
- /* If the argument was the same in all lanes create
-the correctly typed vector shift amount directly.  */
+ /* If the argument was the same in all lanes create the
+correctly typed vector shift amount directly.  Note
+we made SLP scheduling think we use the original scalars,
+so place the compensation code next to the shift which
+is conservative.  See PR119640 where it otherwise breaks.  */
  op1 = fold_convert (TREE_TYPE (vectype), op1);
  op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
- !loop_vinfo ? gsi : NULL);
+ gsi);
  vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
-!loop_vinfo ? gsi : NULL);
+gsi);
  vec_oprnds1.create (slp_node->vec_stmts_size);
  for (k = 0; k < slp_node->vec_stmts_size; k++)
vec_oprnds1.quick_push (vec_oprnd1);


[gcc] Created branch 'meissner/heads/work200' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200' was created in namespace 'refs/users' 
pointing to:

 fdbe017bc15f... cobol: sed portability fix


[gcc(refs/users/meissner/heads/work200)] Add ChangeLog.meissner and REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:150f967ed97a0460b49915952c0fa4d5e577140f

commit 150f967ed97a0460b49915952c0fa4d5e577140f
Author: Michael Meissner 
Date:   Mon Apr 7 09:47:45 2025 -0400

Add ChangeLog.meissner and REVISION.

2025-04-07  Michael Meissner  

gcc/

* REVISION: New file for branch.
* ChangeLog.meissner: New file.

gcc/c-family/

* ChangeLog.meissner: New file.

gcc/c/

* ChangeLog.meissner: New file.

gcc/cp/

* ChangeLog.meissner: New file.

gcc/fortran/

* ChangeLog.meissner: New file.

gcc/testsuite/

* ChangeLog.meissner: New file.

libgcc/

* ChangeLog.meissner: New file.

Diff:
---
 gcc/ChangeLog.meissner   | 5 +
 gcc/REVISION | 1 +
 gcc/c-family/ChangeLog.meissner  | 5 +
 gcc/c/ChangeLog.meissner | 5 +
 gcc/cp/ChangeLog.meissner| 5 +
 gcc/fortran/ChangeLog.meissner   | 5 +
 gcc/testsuite/ChangeLog.meissner | 5 +
 libgcc/ChangeLog.meissner| 5 +
 libstdc++-v3/ChangeLog.meissner  | 5 +
 9 files changed, 41 insertions(+)

diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
new file mode 100644
index ..77d4655fc30a
--- /dev/null
+++ b/gcc/REVISION
@@ -0,0 +1 @@
+work200 branch
diff --git a/gcc/c-family/ChangeLog.meissner b/gcc/c-family/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/c-family/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/c/ChangeLog.meissner b/gcc/c/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/c/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/cp/ChangeLog.meissner b/gcc/cp/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/cp/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/fortran/ChangeLog.meissner b/gcc/fortran/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/fortran/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/libgcc/ChangeLog.meissner b/libgcc/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/libgcc/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/libstdc++-v3/ChangeLog.meissner b/libstdc++-v3/ChangeLog.meissner
new file mode 100644
index ..b7e9ab3cf61f
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.meissner
@@ -0,0 +1,5 @@
+ Branch work200, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch


[gcc(refs/users/meissner/heads/work200-dmf)] Add ChangeLog.dmf and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:ed6085210b8ac23de8489484c9ac673a790054a2

commit ed6085210b8ac23de8489484c9ac673a790054a2
Author: Michael Meissner 
Date:   Mon Apr 7 09:48:57 2025 -0400

Add ChangeLog.dmf and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.dmf: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.dmf | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf
new file mode 100644
index ..721145bd2251
--- /dev/null
+++ b/gcc/ChangeLog.dmf
@@ -0,0 +1,5 @@
+ Branch work200-dmf, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..77ae8dee498a 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-dmf branch


[gcc] Created branch 'meissner/heads/work200-libs' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-libs' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work200-libs)] Add ChangeLog.libs and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0a2676f587c06a691f4a9f3bcfc0e55caff88ef8

commit 0a2676f587c06a691f4a9f3bcfc0e55caff88ef8
Author: Michael Meissner 
Date:   Mon Apr 7 09:52:31 2025 -0400

Add ChangeLog.libs and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.libs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.libs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
new file mode 100644
index ..2ff42160f2ff
--- /dev/null
+++ b/gcc/ChangeLog.libs
@@ -0,0 +1,5 @@
+ Branch work200-libs, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..8767f6a58b46 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-libs branch


[gcc(refs/users/meissner/heads/work200-bugs)] Add ChangeLog.bugs and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4aa59ad1d243a20244adb031dfae6b380d0f2349

commit 4aa59ad1d243a20244adb031dfae6b380d0f2349
Author: Michael Meissner 
Date:   Mon Apr 7 09:51:09 2025 -0400

Add ChangeLog.bugs and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.bugs: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.bugs | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs
new file mode 100644
index ..86df7fca152e
--- /dev/null
+++ b/gcc/ChangeLog.bugs
@@ -0,0 +1,5 @@
+ Branch work200-bugs, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..344b92ce14f0 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-bugs branch


[gcc(refs/users/meissner/heads/work200-sha)] Add ChangeLog.sha and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:7401b059d81534f9af028dab0d89ee039cdd9615

commit 7401b059d81534f9af028dab0d89ee039cdd9615
Author: Michael Meissner 
Date:   Mon Apr 7 09:53:34 2025 -0400

Add ChangeLog.sha and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.sha: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.sha | 5 +
 gcc/REVISION  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
new file mode 100644
index ..903e66de9d62
--- /dev/null
+++ b/gcc/ChangeLog.sha
@@ -0,0 +1,5 @@
+ Branch work200-sha, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..2f460e853994 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-sha branch


[gcc] Created branch 'meissner/heads/work200-sha' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-sha' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work200-vpair)] Add ChangeLog.vpair and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:0cc23355ef454f27c0c593bf9e7ce16b0329e575

commit 0cc23355ef454f27c0c593bf9e7ce16b0329e575
Author: Michael Meissner 
Date:   Mon Apr 7 09:50:07 2025 -0400

Add ChangeLog.vpair and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.vpair: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.vpair | 5 +
 gcc/REVISION| 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
new file mode 100644
index ..43a49c4d894a
--- /dev/null
+++ b/gcc/ChangeLog.vpair
@@ -0,0 +1,5 @@
+ Branch work200-vpair, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..73ad46cfd2b9 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-vpair branch


[gcc] Created branch 'meissner/heads/work200-paddis' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-paddis' was created in namespace 
'refs/users' pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc r15-9255] nvptx: Support '-mfake-ptx-alloca': defer failure to run-time 'alloca' usage

2025-04-07 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:199f1abeef579912b4c40c42519825cedca6530f

commit r15-9255-g199f1abeef579912b4c40c42519825cedca6530f
Author: Thomas Schwinge 
Date:   Sun Apr 6 17:44:18 2025 +0200

nvptx: Support '-mfake-ptx-alloca': defer failure to run-time 'alloca' usage

Follow-up to commit 1146410c0feb0e82c689b1333fdf530a2b34dc2b
"nvptx: Support '-mfake-ptx-alloca'".  '-mfake-ptx-alloca' is applicable 
only
for configurations where PTX 'alloca' is not supported, where target 
libraries
are built with it enabled (that is, libstdc++, libgfortran).

This change progresses:

[-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++17 (test for 
excess errors)
[-UNRESOLVED:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++17 
[-compilation failed to produce executable-]{+execution test+}
[-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++26 (test for 
excess errors)
[-UNRESOLVED:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++26 
[-compilation failed to produce executable-]{+execution test+}
UNSUPPORTED: g++.dg/tree-ssa/pr20458.C  -std=gnu++98: exception 
handling not supported

..., and "enables" a few test cases:

FAIL: g++.old-deja/g++.other/sibcall1.C  -std=gnu++17 (test for excess 
errors)
[Etc.]

FAIL: g++.old-deja/g++.other/unchanging1.C  -std=gnu++17 (test for 
excess errors)
[Etc.]

..., which now (unrelatedly to 'alloca', and in the same way as 
configurations
where PTX 'alloca' is supported) FAIL due to:

unresolved symbol _Unwind_DeleteException
collect2: error: ld returned 1 exit status

Most importantly, it progresses ~830 libstdc++ test cases:

[-FAIL:-]{+PASS:+} [...] (test for excess errors)

..., with (if applicable, for most of them):

[-UNRESOLVED:-]{+PASS:+} [...] [-compilation failed to produce 
executable-]{+execution test+}

..., or just a few 'FAIL: [...] execution test' where these test cases also
FAIL in configurations where PTX 'alloca' is supported, or ~120 instances of
'FAIL: [...]  execution test' due to run-time
'GCC/nvptx: sorry, unimplemented: dynamic stack allocation not supported'.

This change also resolves the cases noted in
commit bac2d8a246892334e24dfa7d62be0cd0648c5606
"nvptx: Build libgfortran with '-mfake-ptx-alloca' [PR107635]":

| With '-mfake-ptx-alloca', libgfortran again succeeds to build, and 
compared
| to before, we've got only a small number of regressions due to nvptx 'ld'
| complaining about 'unresolved symbol 
__GCC_nvptx__PTX_alloca_not_supported':
|
| [-PASS:-]{+FAIL:+} gfortran.dg/coarray/codimension_2.f90 
-fcoarray=lib  -O2  -lcaf_single (test for excess errors)

[-FAIL:-]{+PASS:+} gfortran.dg/coarray/codimension_2.f90 -fcoarray=lib  
-O2  -lcaf_single (test for excess errors)

| [-PASS:-]{+FAIL:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2 
 -lcaf_single (test for excess errors)
| [-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/event_4.f08 
-fcoarray=lib  -O2  -lcaf_single [-execution test-]{+compilation failed to 
produce executable+}

[-FAIL:-]{+PASS:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2  
-lcaf_single (test for excess errors)
[-UNRESOLVED:-]{+PASS:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  
-O2  -lcaf_single [-compilation failed to produce executable-]{+execution test+}

| [-PASS:-]{+FAIL:+} gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib 
 -O2  -lcaf_single (test for excess errors)
| [-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/fail_image_2.f08 
-fcoarray=lib  -O2  -lcaf_single [-execution test-]{+compilation failed to 
produce executable+}

[-FAIL:-]{+PASS:+} gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib  
-O2  -lcaf_single (test for excess errors)
[-UNRESOLVED:-]{+PASS:+} gfortran.dg/coarray/fail_image_2.f08 
-fcoarray=lib  -O2  -lcaf_single [-compilation failed to produce 
executable-]{+execution test+}

| [-PASS:-]{+FAIL:+} gfortran.dg/coarray/proc_pointer_assign_1.f90 
-fcoarray=lib  -O2  -lcaf_single (test for excess errors)
| [-PASS:-]{+UNRESOLVED:+} 
gfortran.dg/coarray/proc_pointer_assign_1.f90 -fcoarray=lib  -O2  -lcaf_single 
[-execution test-]{+compilation failed to produce executable+}

[-FAIL:-]{+PASS:+} gfortran.dg/coarray/proc_pointer_assign_1.f90 
-fcoarray=lib  -O2  -lcaf_single (test for excess errors)
[-UNRESOLVED:-]{+PASS:+} gfortran.dg/coarray/proc_pointer_assign_1.f90 
-fcoarray=lib  -O2  -lcaf_single [-compilation failed to produce 
executable-]{+execution test+}

| [-PASS:-]{+FAIL:+} gfortran.dg/coarray_43.f90   -O  (test for excess 
errors)

[-FAIL:-]{+PASS:+} gfortran.dg/coarray_43.f90   -O  (test for excess 
errors)

..., and further progresses:

[-FAIL:-]{+PA

[gcc] Created branch 'meissner/heads/work200-math' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-math' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work200-test' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-test' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc(refs/users/meissner/heads/work200-math)] Add ChangeLog.math and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6dfce1cedbdb8d0d0165897d302e6b7ca588d49f

commit 6dfce1cedbdb8d0d0165897d302e6b7ca588d49f
Author: Michael Meissner 
Date:   Mon Apr 7 09:55:50 2025 -0400

Add ChangeLog.math and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.math: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.math | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.math b/gcc/ChangeLog.math
new file mode 100644
index ..35170ad3c375
--- /dev/null
+++ b/gcc/ChangeLog.math
@@ -0,0 +1,5 @@
+ Branch work200-math, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..5df57c9bdf28 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-math branch


[gcc(refs/users/meissner/heads/work200-paddis)] Add ChangeLog.paddis and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b066f70c311fea6bc7c6e94ff62051b0b44ec315

commit b066f70c311fea6bc7c6e94ff62051b0b44ec315
Author: Michael Meissner 
Date:   Mon Apr 7 09:57:57 2025 -0400

Add ChangeLog.paddis and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.paddis: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.paddis | 5 +
 gcc/REVISION | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.paddis b/gcc/ChangeLog.paddis
new file mode 100644
index ..447bfaf2cab5
--- /dev/null
+++ b/gcc/ChangeLog.paddis
@@ -0,0 +1,5 @@
+ Branch work200-paddis, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..4f135a7f58c9 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-paddis branch


[gcc(refs/users/meissner/heads/work200-test)] Add ChangeLog.test and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:4ffec3e947f2bc351a804d389649a926c603f030

commit 4ffec3e947f2bc351a804d389649a926c603f030
Author: Michael Meissner 
Date:   Mon Apr 7 09:54:36 2025 -0400

Add ChangeLog.test and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.test: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.test | 5 +
 gcc/REVISION   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test
new file mode 100644
index ..aeea64ec7df6
--- /dev/null
+++ b/gcc/ChangeLog.test
@@ -0,0 +1,5 @@
+ Branch work200-test, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..bf60953fbb17 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-test branch


[gcc(refs/users/meissner/heads/work200-submit)] Add ChangeLog.submit and update REVISION.

2025-04-07 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:6588360b832041ee054a79efb9c332d92a26459d

commit 6588360b832041ee054a79efb9c332d92a26459d
Author: Michael Meissner 
Date:   Mon Apr 7 09:56:52 2025 -0400

Add ChangeLog.submit and update REVISION.

2025-04-07  Michael Meissner  

gcc/

* ChangeLog.submit: New file for branch.
* REVISION: Update.

Diff:
---
 gcc/ChangeLog.submit | 5 +
 gcc/REVISION | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.submit b/gcc/ChangeLog.submit
new file mode 100644
index ..e2164c8ab603
--- /dev/null
+++ b/gcc/ChangeLog.submit
@@ -0,0 +1,5 @@
+ Branch work200-submit, baseline 
+
+2025-04-07   Michael Meissner  
+
+   Clone branch
diff --git a/gcc/REVISION b/gcc/REVISION
index 77d4655fc30a..3bed4d9d4b31 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-work200 branch
+work200-submit branch


[gcc r15-9250] sra: Avoid creating TBAA hazards (PR118924)

2025-04-07 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:07d243670020b339380194f6125cde87ada56148

commit r15-9250-g07d243670020b339380194f6125cde87ada56148
Author: Martin Jambor 
Date:   Mon Apr 7 13:32:09 2025 +0200

sra: Avoid creating TBAA hazards (PR118924)

The testcase in PR 118924, when compiled on Aarch64, contains an
gimple aggregate assignment statement in between different types which
are types_compatible_p but behave differently for the purposes of
alias analysis.

SRA replaces the statement with a series of scalar assignments which
however have LHSs access chains modeled on the RHS type and so do not
alias with a subsequent reads and so are DSEd.

SRA clearly gets its "same_access_path" logic subtly wrong.  One issue
is that the same_access_path_p function probably should be implemented
more along the lines of (parts of ao_compare::compare_ao_refs) instead
of internally relying on operand_equal_p.  That is however not the
problem in the PR and so I will deal with it only later.

The issue here is that even when the access path is the same, it must
not be bolted on an aggregate type that does not match.  This patch
does that, taking just one simple function from the
ao_compare::compare_ao_refs machinery and using it to detect the
situation.  The rest is just merging the information in between
accesses of the same access group.

I looked at how many times we come across such assignment during
"make stage2-bubble" of GCC (configured with only c and C++ and
without multilib and libsanitizers) and on an x86_64 there were 87924
such assignments (though now I realize not all of them had to be
aggregate), so they do happen.  The patch leads to about 5% increase
of cases where we don't use an "access path" but resort to a
MEM_REF (from 90209 to 95204).  On an Aarch64, there were 92268 such
assignments and the increase of falling back to MEM_REFs was by
4% (but from a bigger base 132983 to 107991).

gcc/ChangeLog:

2025-04-04  Martin Jambor  

PR tree-optimization/118924
* tree-ssa-alias-compare.h (types_equal_for_same_type_for_tbaa_p):
Declare.
* tree-ssa-alias.cc: Include ipa-utils.h.
(types_equal_for_same_type_for_tbaa_p): New public overloaded 
variant.
* tree-sra.cc: Include tree-ssa-alias-compare.h.
(create_access): Initialzie grp_same_access_path to true.
(build_accesses_from_assign): Detect tbaa hazards and clear
grp_same_access_path fields of involved accesses when they occur.
(sort_and_splice_var_accesses): Take previous values of
grp_same_access_path into account.

gcc/testsuite/ChangeLog:

2025-03-25  Martin Jambor  

PR tree-optimization/118924
* g++.dg/tree-ssa/pr118924.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/pr118924.C | 29 +
 gcc/tree-sra.cc  | 17 ++---
 gcc/tree-ssa-alias-compare.h |  2 ++
 gcc/tree-ssa-alias.cc| 13 -
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr118924.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr118924.C
new file mode 100644
index ..c95eacafc9ce
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr118924.C
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-std=c++17 -O2" } */
+
+template  struct Vector {
+  int m_data[Size];
+  Vector(int, int, int) {}
+};
+enum class E { POINTS, LINES, TRIANGLES };
+
+__attribute__((noipa))
+void getName(E type) {
+  static E check = E::POINTS;
+  if (type == check)
+check = (E)((int)check + 1);
+  else
+__builtin_abort ();
+}
+
+int main() {
+  int arr[]{0, 1, 2};
+  for (auto dim : arr) {
+Vector<3> localInvs(1, 1, 1);
+localInvs.m_data[dim] = 8;
+  }
+  E types[] = {E::POINTS, E::LINES, E::TRIANGLES};
+  for (auto primType : types)
+getName(primType);
+  return 0;
+}
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index c26559edc666..ae7cd57a5f23 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "tree-sra.h"
 #include "opts.h"
+#include "tree-ssa-alias-compare.h"
 
 /* Enumeration of all aggregate reductions we can do.  */
 enum sra_mode { SRA_MODE_EARLY_IPA,   /* early call regularization */
@@ -979,6 +980,7 @@ create_access (tree expr, gimple *stmt, bool write)
   access->type = TREE_TYPE (expr);
   access->write = write;
   access->grp_unscalarizable_region = unscalarizable_region;
+  access->grp_same_access_path = true;
   access->stmt = stmt;
   access->reverse = reverse;
 
@@ -1522,6 +1524,9 @@ build_accesses_from_assign (gimple *stmt)
   racc = build_access_from_expr_1 (rhs, stmt, false);
   lacc = build_access_from_expr_1 (lhs, stmt,

[gcc r15-9251] sra: Clear grp_same_access_path of acesses created by total scalarization (PR118924)

2025-04-07 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:40445711b8af113ef423d8bcac1a7ce1c47f62d7

commit r15-9251-g40445711b8af113ef423d8bcac1a7ce1c47f62d7
Author: Martin Jambor 
Date:   Mon Apr 7 13:32:10 2025 +0200

sra: Clear grp_same_access_path of acesses created by total scalarization 
(PR118924)

During analysis of PR 118924 it was discussed that total scalarization
invents access paths (strings of COMPONENT_REFs and possibly even
ARRAY_REFs) which did not exist in the program before which can have
unintended effects on subsequent AA queries.  Although not doing that
does not mean that SRA cannot create such situations (see the bug for
more info), it has been agreed that not doing this is generally better.
This patch therfore makes SRA fall back on creating simple MEM_REFs when
accessing components of an aggregate corresponding to what a SRA
variable now represents.

gcc/ChangeLog:

2025-03-26  Martin Jambor  

PR tree-optimization/118924
* tree-sra.cc (create_total_scalarization_access): Set
grp_same_access_path flag to zero.

Diff:
---
 gcc/tree-sra.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index ae7cd57a5f23..302b73e83b8f 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -3462,7 +3462,7 @@ create_total_scalarization_access (struct access *parent, 
HOST_WIDE_INT pos,
   access->grp_write = parent->grp_write;
   access->grp_total_scalarization = 1;
   access->grp_hint = 1;
-  access->grp_same_access_path = path_comparable_for_same_access (expr);
+  access->grp_same_access_path = 0;
   access->reverse = reverse_storage_order_for_component_p (expr);
 
   access->next_sibling = next_sibling;


[gcc(refs/users/alfierichards/heads/fmv_c)] c: Add target_version attribute support.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:2a3abe41cbd4b8d6b509b72f086ff1b58fb285de

commit 2a3abe41cbd4b8d6b509b72f086ff1b58fb285de
Author: Alfie Richards 
Date:   Wed Apr 2 13:37:02 2025 +

c: Add target_version attribute support.

This commit introduces support for the target_version attribute in the c
frontend, following the behavior defined in the Arm C Language Extension.

Key changes include:

- During pushdecl, the compiler now checks whether the current symbol is
  part of a multiversioned set.
  - New versions are added to the function multiversioning (FMV) set, and 
the
symbol binding is updated to include the default version (if present).
This means the binding for a multiversioned symbol will always reference
the default version (if present), as it defines the scope and signature
for the entire set.
  - Pre-existing versions are merged with their previous version (or 
diagnosed).
- Lookup logic is adjusted to prevent resolving non-default versions.
- start_decl and start_function are updated to handle marking and mangling 
of
  versioned functions.
- c_parse_final_cleanups now includes a call to process_same_body_aliases.
  This has no functional impact other than setting cpp_implicit_aliases_done
  on all nodes, which is necessary for certain shared FMV logic.

gcc/c/ChangeLog:

* c-decl.cc (maybe_mark_function_versioned): New function.
(merge_decls): Preserve DECL_FUNCTION_VERSIONED in merging.
(duplicate_decls): Add check and diagnostic for unmergable version 
decls.
(pushdecl): Add FMV target_version logic.
(lookup_name): Dont resolve non-default versions.
(start_decl): Mark and mangle versioned functions.
(start_function): Mark and mangle versioned functions.
(c_parse_final_cleanups): Add call to process_same_body_aliases.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/mv-1.c: New test.
* gcc.target/aarch64/mv-and-mvc1.c: New test.
* gcc.target/aarch64/mv-and-mvc2.c: New test.
* gcc.target/aarch64/mv-and-mvc3.c: New test.
* gcc.target/aarch64/mv-and-mvc4.c: New test.
* gcc.target/aarch64/mv-symbols1.c: New test.
* gcc.target/aarch64/mv-symbols10.c: New test.
* gcc.target/aarch64/mv-symbols11.c: New test.
* gcc.target/aarch64/mv-symbols12.c: New test.
* gcc.target/aarch64/mv-symbols13.c: New test.
* gcc.target/aarch64/mv-symbols2.c: New test.
* gcc.target/aarch64/mv-symbols3.c: New test.
* gcc.target/aarch64/mv-symbols4.c: New test.
* gcc.target/aarch64/mv-symbols5.c: New test.
* gcc.target/aarch64/mv-symbols6.c: New test.
* gcc.target/aarch64/mv-symbols7.c: New test.
* gcc.target/aarch64/mv-symbols8.c: New test.
* gcc.target/aarch64/mv-symbols9.c: New test.
* gcc.target/aarch64/mvc-symbols1.c: New test.
* gcc.target/aarch64/mvc-symbols2.c: New test.
* gcc.target/aarch64/mvc-symbols3.c: New test.
* gcc.target/aarch64/mvc-symbols4.c: New test.

Diff:
---
 gcc/c/c-decl.cc | 117 
 gcc/testsuite/gcc.target/aarch64/mv-1.c |  43 +
 gcc/testsuite/gcc.target/aarch64/mv-and-mvc1.c  |  37 
 gcc/testsuite/gcc.target/aarch64/mv-and-mvc2.c  |  28 ++
 gcc/testsuite/gcc.target/aarch64/mv-and-mvc3.c  |  40 
 gcc/testsuite/gcc.target/aarch64/mv-and-mvc4.c  |  37 
 gcc/testsuite/gcc.target/aarch64/mv-symbols1.c  |  38 
 gcc/testsuite/gcc.target/aarch64/mv-symbols10.c |  42 +
 gcc/testsuite/gcc.target/aarch64/mv-symbols11.c |  16 
 gcc/testsuite/gcc.target/aarch64/mv-symbols12.c |  27 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols13.c |  28 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols2.c  |  28 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols3.c  |  27 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols4.c  |  31 +++
 gcc/testsuite/gcc.target/aarch64/mv-symbols5.c  |  36 
 gcc/testsuite/gcc.target/aarch64/mv-symbols6.c  |  20 
 gcc/testsuite/gcc.target/aarch64/mv-symbols7.c  |  47 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols8.c  |  47 ++
 gcc/testsuite/gcc.target/aarch64/mv-symbols9.c  |  44 +
 gcc/testsuite/gcc.target/aarch64/mvc-symbols1.c |  25 +
 gcc/testsuite/gcc.target/aarch64/mvc-symbols2.c |  15 +++
 gcc/testsuite/gcc.target/aarch64/mvc-symbols3.c |  19 
 gcc/testsuite/gcc.target/aarch64/mvc-symbols4.c |  12 +++
 23 files changed, 804 insertions(+)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index c778c7febfa0..977e6ef37878 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -2087,6 +2087,29 @@ previous_tag (tree type)
   return NULL_TREE;
 }
 
+/* Subr

[gcc(refs/users/alfierichards/heads/fmv_c)] c: Improve diagnostics for C FMV declaration conflicts.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:2061ee1fbd17cadf7b840e03c6e881d0c508a296

commit 2061ee1fbd17cadf7b840e03c6e881d0c508a296
Author: Alfie Richards 
Date:   Wed Apr 2 14:24:00 2025 +

c: Improve diagnostics for C FMV declaration conflicts.

Improves diagnostic messages for conflicting function multiversioning (FMV)
declarations using target_version and/or target_clones attributes.

Conflict errors now include the overlapping version string (if relevant),
making it easier to identify and resolve declaration mismatches.

gcc/c/ChangeLog:

* c-decl.cc (diagnose_mismatched_decls): Add conflicting_ver 
argument
and update diagnostics with it.
(duplicate_decls): Ditto.
(pushdecl): Add conflicting_version variable update logic to use it.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/mv-and-mvc-error1.c: New test.
* gcc.target/aarch64/mv-and-mvc-error2.c: New test.
* gcc.target/aarch64/mv-and-mvc-error3.c: New test.
* gcc.target/aarch64/mv-error1.c: New test.
* gcc.target/aarch64/mv-error2.c: New test.
* gcc.target/aarch64/mv-error3.c: New test.
* gcc.target/aarch64/mv-error4.c: New test.
* gcc.target/aarch64/mv-error5.c: New test.
* gcc.target/aarch64/mv-error6.c: New test.
* gcc.target/aarch64/mv-error7.c: New test.
* gcc.target/aarch64/mv-error8.c: New test.
* gcc.target/aarch64/mv-error9.c: New test.
* gcc.target/aarch64/mvc-error1.c: New test.
* gcc.target/aarch64/mvc-error2.c: New test.
* gcc.target/aarch64/mvc-warning1.c: New test.

Diff:
---
 gcc/c/c-decl.cc| 37 --
 .../gcc.target/aarch64/mv-and-mvc-error1.c |  9 ++
 .../gcc.target/aarch64/mv-and-mvc-error2.c |  9 ++
 .../gcc.target/aarch64/mv-and-mvc-error3.c |  8 +
 gcc/testsuite/gcc.target/aarch64/mv-error1.c   | 18 +++
 gcc/testsuite/gcc.target/aarch64/mv-error2.c   |  9 ++
 gcc/testsuite/gcc.target/aarch64/mv-error3.c   | 12 +++
 gcc/testsuite/gcc.target/aarch64/mv-error4.c   |  9 ++
 gcc/testsuite/gcc.target/aarch64/mv-error5.c   |  8 +
 gcc/testsuite/gcc.target/aarch64/mv-error6.c   | 20 
 gcc/testsuite/gcc.target/aarch64/mv-error7.c   | 11 +++
 gcc/testsuite/gcc.target/aarch64/mv-error8.c   | 12 +++
 gcc/testsuite/gcc.target/aarch64/mv-error9.c   | 12 +++
 gcc/testsuite/gcc.target/aarch64/mvc-error1.c  |  9 ++
 gcc/testsuite/gcc.target/aarch64/mvc-error2.c  |  9 ++
 gcc/testsuite/gcc.target/aarch64/mvc-warning1.c| 13 
 16 files changed, 195 insertions(+), 10 deletions(-)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 977e6ef37878..915743eb5ec3 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -2119,8 +2119,8 @@ maybe_mark_function_versioned (tree decl)
TREE_TYPE (NEWDECL, OLDDECL) respectively.  */
 
 static bool
-diagnose_mismatched_decls (tree newdecl, tree olddecl,
-  tree *newtypep, tree *oldtypep)
+diagnose_mismatched_decls (tree newdecl, tree olddecl, tree *newtypep,
+  tree *oldtypep, string_slice *conflicting_ver = NULL)
 {
   tree newtype, oldtype;
   bool retval = true;
@@ -2448,7 +2448,12 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl,
DECL_ATTRIBUTES (newdecl)
{
  auto_diagnostic_group d;
- error ("redefinition of %q+D", newdecl);
+ if (conflicting_ver && conflicting_ver->is_valid ())
+   error ("redefinition of %qB version for %q+D",
+  conflicting_ver,
+  newdecl);
+ else
+   error ("redefinition of %q+D", newdecl);
  locate_old_decl (olddecl);
  return false;
}
@@ -3188,20 +3193,29 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, 
tree oldtype)
true.  Otherwise, return false.  */
 
 static bool
-duplicate_decls (tree newdecl, tree olddecl)
+duplicate_decls (tree newdecl, tree olddecl,
+string_slice *conflicting_ver = NULL)
 {
   tree newtype = NULL, oldtype = NULL;
 
   if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE
+  && TREE_CODE (olddecl) == FUNCTION_DECL
+  && TREE_CODE (newdecl) == FUNCTION_DECL
   && !mergeable_version_decls (olddecl, newdecl))
 {
   auto_diagnostic_group d;
-  error ("conflicting versioned declarations of %q+D", newdecl);
+  if (conflicting_ver && conflicting_ver->is_valid ())
+   error ("conflicting %qB version declarations of %q+D",
+  conflicting_ver,
+  newdecl);
+  else
+   error ("conflicting versioned declarations of %q+D", new

[gcc r15-9259] libstdc++: Add new headers to for PCH

2025-04-07 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:107f8de8fb5004bbf219c47b60dfd721762437d2

commit r15-9259-g107f8de8fb5004bbf219c47b60dfd721762437d2
Author: Jonathan Wakely 
Date:   Mon Apr 7 11:15:03 2025 +0100

libstdc++: Add new headers to  for PCH

This adds the new C23 headers to the PCH, and also removes the
__has_include check for  because we provide that
unconditionally now.

libstdc++-v3/ChangeLog:

* include/precompiled/stdc++.h: Include  and
. Include  unconditionally.

Diff:
---
 libstdc++-v3/include/precompiled/stdc++.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/precompiled/stdc++.h 
b/libstdc++-v3/include/precompiled/stdc++.h
index 1ffde3ed450c..f4b312d9e470 100644
--- a/libstdc++-v3/include/precompiled/stdc++.h
+++ b/libstdc++-v3/include/precompiled/stdc++.h
@@ -230,15 +230,15 @@
 #include 
 #include 
 #include 
-#if __has_include()
-# include 
-#endif
+#include 
 #include 
 #include 
 #endif
 
 #if __cplusplus > 202302L
 #include 
+#include 
+#include 
 #endif
 
 #endif // HOSTED


[gcc r15-9260] libstdc++: Remove stray pragma in new header [PR119642]

2025-04-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:3b1cf36eb47e217b6f093abd9f76ea358d456e2e

commit r15-9260-g3b1cf36eb47e217b6f093abd9f76ea358d456e2e
Author: Jonathan Wakely 
Date:   Mon Apr 7 11:30:41 2025 +0100

libstdc++: Remove stray pragma in new header [PR119642]

libstdc++-v3/ChangeLog:

PR libstdc++/119642
* include/bits/formatfwd.h: Remove stray pragma.

Diff:
---
 libstdc++-v3/include/bits/formatfwd.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/formatfwd.h 
b/libstdc++-v3/include/bits/formatfwd.h
index 44922cb83fc7..a6b5ac8c8ce1 100644
--- a/libstdc++-v3/include/bits/formatfwd.h
+++ b/libstdc++-v3/include/bits/formatfwd.h
@@ -67,5 +67,4 @@ namespace __format
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 #endif // __glibcxx_format
-#pragma GCC diagnostic pop
 #endif // _GLIBCXX_FORMAT_FWD_H


[gcc r15-9258] libstdc++: Add new headers to Doxygen config file

2025-04-07 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:5430fcd1a3222d62c1b9560de251268c8bc50303

commit r15-9258-g5430fcd1a3222d62c1b9560de251268c8bc50303
Author: Jonathan Wakely 
Date:   Thu Apr 3 15:36:08 2025 +0100

libstdc++: Add new headers to Doxygen config file

libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in (INPUT): Add flat_map, flat_set,
text_encoding, stdbit.h and stdckdint.h.

Diff:
---
 libstdc++-v3/doc/doxygen/user.cfg.in | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in 
b/libstdc++-v3/doc/doxygen/user.cfg.in
index ae50f6dd0c74..19ae67a67bac 100644
--- a/libstdc++-v3/doc/doxygen/user.cfg.in
+++ b/libstdc++-v3/doc/doxygen/user.cfg.in
@@ -861,6 +861,8 @@ INPUT  = @srcdir@/doc/doxygen/doxygroups.cc 
\
  include/deque \
  include/expected \
  include/filesystem \
+ include/flat_map \
+ include/flat_set \
  include/forward_list \
  include/format \
  include/fstream \
@@ -906,6 +908,7 @@ INPUT  = @srcdir@/doc/doxygen/doxygroups.cc 
\
  include/string_view \
  include/syncstream \
  include/system_error \
+ include/text_encoding \
  include/thread \
  include/tuple \
  include/typeindex \
@@ -942,6 +945,8 @@ INPUT  = @srcdir@/doc/doxygen/doxygroups.cc 
\
  include/cwchar \
  include/cuchar \
  include/cwctype \
+ include/stdbit.h \
+ include/stdckdint.h \
  include/ \
  include/bits \
  include/@host_alias@/bits \


[gcc r15-9261] aarch64, Darwin: Initial implementation of Apple cores [PR113257].

2025-04-07 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:3a77a567b1028a28ecbb2f2eadc351d8bd004352

commit r15-9261-g3a77a567b1028a28ecbb2f2eadc351d8bd004352
Author: Iain Sandoe 
Date:   Sun Oct 15 10:19:22 2023 +0100

aarch64, Darwin: Initial implementation of Apple cores [PR113257].

After discussion with the open source support team at Apple, we have
established that the cores conform to the 8.5 and 8.6 requirements.
One of the mandatory features (FEAT_SPECRES) is not exposed (or
available) in user-space code but is supported for privileged code.

The values for chip IDs and the LITTLE.big variants have been taken
from lists in the XNU and LLVM sources.

PR target/113257

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (AARCH64_CORE): Add Apple-a12,
Apple-M1, Apple-M2, Apple-M3 with expanded names to allow for the
LITTLE.big versions.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi: Add apple-m1,2 and 3 cores to the ones listed
for arch and tune selections.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/config/aarch64/aarch64-cores.def | 16 
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/doc/invoke.texi  |  6 --
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 0e22d72976ef..7f204fd0ac92 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -173,6 +173,22 @@ AARCH64_CORE("cortex-a76.cortex-a55",  cortexa76cortexa55, 
cortexa53, V8_2A,  (F
 AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 
0xd15, -1)
 AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 
0xd14, -1)
 
+/* Apple (A12 and M) cores.
+   Known part numbers as listed in other public sources.
+   Placeholders for schedulers, generic_armv8_a for costs.
+   A12 seems mostly 8.3, M1 is 8.5 without BTI, M2 and M3 are 8.6
+   From measurements made so far the odd-number core IDs are performance.  */
+AARCH64_CORE("apple-a12", applea12, cortexa53, V8_3A,  (), generic_armv8_a, 
0x61, 0x12, -1)
+AARCH64_CORE("apple-m1", applem1_0, cortexa57, V8_5A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x21, 0x20), -1)
+AARCH64_CORE("apple-m1", applem1_1, cortexa57, V8_5A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x23, 0x22), -1)
+AARCH64_CORE("apple-m1", applem1_2, cortexa57, V8_5A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x25, 0x24), -1)
+AARCH64_CORE("apple-m1", applem1_3, cortexa57, V8_5A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x29, 0x28), -1)
+AARCH64_CORE("apple-m2", applem2_0, cortexa57, V8_6A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x31, 0x30), -1)
+AARCH64_CORE("apple-m2", applem2_1, cortexa57, V8_6A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x33, 0x32), -1)
+AARCH64_CORE("apple-m2", applem2_2, cortexa57, V8_6A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x35, 0x34), -1)
+AARCH64_CORE("apple-m2", applem2_3, cortexa57, V8_6A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x39, 0x38), -1)
+AARCH64_CORE("apple-m3", applem3_0, cortexa57, V8_6A,  (), generic_armv8_a, 
0x61, AARCH64_BIG_LITTLE (0x49, 0x48), -1)
+
 /* Armv9.0-A Architecture Processors.  */
 
 /* Arm ('A') cores. */
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 56a914f12b9c..982074c2c21e 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99

[gcc r14-11535] c++: constinit and value-initialization [PR119652]

2025-04-07 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:749221fac55059a145d3fdaf416fe663a4ee33d9

commit r14-11535-g749221fac55059a145d3fdaf416fe663a4ee33d9
Author: Jason Merrill 
Date:   Mon Apr 7 11:49:19 2025 -0400

c++: constinit and value-initialization [PR119652]

Value-initialization built an AGGR_INIT_EXPR to set AGGR_INIT_ZERO_FIRST on.
Passing that AGGR_INIT_EXPR to maybe_constant_value returned a TARGET_EXPR,
which potential_constant_expression_1 mistook for a temporary.

We shouldn't add a TARGET_EXPR to the AGGR_INIT_EXPR in this case, just like
we already avoid adding it to CONSTRUCTOR or CALL_EXPR.

PR c++/119652

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_outermost_constant_expr): Also don't add a
TARGET_EXPR around AGGR_INIT_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constinit20.C: New test.

(cherry picked from commit c7dc9b6f889fa8f9e4ef060c3af107eaf54265c5)

Diff:
---
 gcc/cp/constexpr.cc  |  3 ++-
 gcc/testsuite/g++.dg/cpp2a/constinit20.C | 18 ++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 2da5ddfffe37..14e4bf7f03a5 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -9034,7 +9034,8 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   if (TREE_CODE (t) == TARGET_EXPR
  && TARGET_EXPR_INITIAL (t) == r)
return t;
-  else if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == CALL_EXPR)
+  else if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == CALL_EXPR
+  || TREE_CODE (t) == AGGR_INIT_EXPR)
/* Don't add a TARGET_EXPR if our argument didn't have one.  */;
   else if (TREE_CODE (t) == TARGET_EXPR && TARGET_EXPR_CLEANUP (t))
r = get_target_expr (r);
diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit20.C 
b/gcc/testsuite/g++.dg/cpp2a/constinit20.C
new file mode 100644
index ..9b043917dc36
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/constinit20.C
@@ -0,0 +1,18 @@
+// PR c++/119652
+// { dg-do compile { target c++20 } }
+
+struct __shared_count {
+  constexpr __shared_count() {}
+  ~__shared_count();
+  int _M_pi = 0;
+};
+struct shared_ptr {
+  __shared_count _M_refcount;
+};
+struct A {
+  A() = default;
+  shared_ptr m;
+};
+constinit A a;
+constinit A b {};
+constinit A c = {};


[gcc r15-9256] GCN, nvptx libstdc++: Force use of '__atomic' builtins [PR119645]

2025-04-07 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:059b5509c14904b55c37f659170240ae0d2c1c8e

commit r15-9256-g059b5509c14904b55c37f659170240ae0d2c1c8e
Author: Thomas Schwinge 
Date:   Sat Apr 5 23:11:23 2025 +0200

GCN, nvptx libstdc++: Force use of '__atomic' builtins [PR119645]

For both GCN, nvptx, this gets rid of 'configure'-time:

configure: WARNING: No native atomic operations are provided for this 
platform.
configure: WARNING: They will be faked using a mutex.
configure: WARNING: Performance of certain classes will degrade as a 
result.

..., and changes:

-checking for lock policy for shared_ptr reference counts... mutex
+checking for lock policy for shared_ptr reference counts... atomic

That means, '[...]/[target]/libstdc++-v3/', 'Makefile's change:

-ATOMICITY_SRCDIR = config/cpu/generic/atomicity_mutex
+ATOMICITY_SRCDIR = config/cpu/generic/atomicity_builtins

..., and '[...]/[target]/libstdc++-v3/config.h' changes:

/* Defined if shared_ptr reference counting should use atomic 
operations. */
-/* #undef HAVE_ATOMIC_LOCK_POLICY */
+#define HAVE_ATOMIC_LOCK_POLICY 1

/* Define if the compiler supports C++11 atomics. */
-/* #undef _GLIBCXX_ATOMIC_BUILTINS */
+#define _GLIBCXX_ATOMIC_BUILTINS 1

..., and '[...]/[target]/libstdc++-v3/include/[target]/bits/c++config.h'
changes:

/* Defined if shared_ptr reference counting should use atomic 
operations. */
-/* #undef _GLIBCXX_HAVE_ATOMIC_LOCK_POLICY */
+#define _GLIBCXX_HAVE_ATOMIC_LOCK_POLICY 1

/* Define if the compiler supports C++11 atomics. */
-/* #undef _GLIBCXX_ATOMIC_BUILTINS */
+#define _GLIBCXX_ATOMIC_BUILTINS 1

This means that '[...]/[target]/libstdc++-v3/libsupc++/atomicity.cc',
'[...]/[target]/libstdc++-v3/libsupc++/atomicity.o' then uses atomic
instructions for synchronization instead of C++ static local variables, 
which
in turn for their guard variables, via 'libstdc++-v3/libsupc++/guard.cc', 
used
'libgcc/gthr.h' recursive mutexes, which currently are unsupported for GCN.

For GCN, this turns ~500 libstdc++ execution test FAILs into PASSes, and 
also
progresses:

PASS: g++.dg/tree-ssa/pr20458.C  -std=gnu++17 (test for excess errors)
[-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++17 execution 
test
PASS: g++.dg/tree-ssa/pr20458.C  -std=gnu++26 (test for excess errors)
[-FAIL:-]{+PASS:+} g++.dg/tree-ssa/pr20458.C  -std=gnu++26 execution 
test
UNSUPPORTED: g++.dg/tree-ssa/pr20458.C  -std=gnu++98: exception 
handling not supported

(For nvptx, there is no effective change, due to other misconfiguration.)

PR target/119645
libstdc++-v3/
* acinclude.m4 (GLIBCXX_ENABLE_LOCK_POLICY) [GCN, nvptx]:
Hard-code results.
* configure: Regenerate.
* configure.host [GCN, nvptx] (atomicity_dir): Set to
'cpu/generic/atomicity_builtins'.

Diff:
---
 libstdc++-v3/acinclude.m4   |  7 ---
 libstdc++-v3/configure  | 11 ++-
 libstdc++-v3/configure.host | 11 +++
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 02fd349e11df..a0094c2dd95b 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4023,10 +4023,11 @@ AC_DEFUN([GLIBCXX_ENABLE_LOCK_POLICY], [
 dnl Why don't we check 8-byte CAS for sparc64, where _Atomic_word is long?!
 dnl New targets should only check for CAS for the _Atomic_word type.
 AC_TRY_COMPILE([
-#if defined __riscv
+#if defined __AMDGCN__ || defined __nvptx__
+/* Yes, please.  */
+#elif defined __riscv
 # error "Defaulting to mutex-based locks for ABI compatibility"
-#endif
-#if ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
+#elif ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
 # error "No 2-byte compare-and-swap"
 #elif ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
 # error "No 4-byte compare-and-swap"
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 56d0bcb297ea..819a1d82876a 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -16394,10 +16394,11 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
-#if defined __riscv
+#if defined __AMDGCN__ || defined __nvptx__
+/* Yes, please.  */
+#elif defined __riscv
 # error "Defaulting to mutex-based locks for ABI compatibility"
-#endif
-#if ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
+#elif ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
 # error "No 2-byte compare-and-swap"
 #elif ! defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
 # error "No 4-byte compare-and-swap"
@@ -16444,7 +16445,7 @@ $as_echo "mu

[gcc r15-9253] cobol: Fix up update_web_docs_git for COBOL [PR119227]

2025-04-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5d3b2f07b13682e49b18b7b06797d4965a600b56

commit r15-9253-g5d3b2f07b13682e49b18b7b06797d4965a600b56
Author: Jakub Jelinek 
Date:   Mon Apr 7 13:53:20 2025 +0200

cobol: Fix up update_web_docs_git for COBOL [PR119227]

As mentioned in the PR, the COBOL documentation is currently not present
in onlinedocs at all.
While the script generates gcobol{,-io}.{pdf,html}, it generates them in
the gcc/gcc/cobol/ subdirectory of the update_web_docs_git temporary
directory and nothing find it there afterwards, all the processing is on
for file in */*.html *.ps *.pdf *.tar; do
So, this patch puts gcobol{,-io}.html into gcobol/ subdirectory and
gcobol{,-io}.pdf into the current directory, so that it is picked up.
With this it makes into onlinedocs:
find . -name \*cobol\*
./onlinedocs/gcobol.pdf.gz
./onlinedocs/gcobol.pdf
./onlinedocs/gcobol_io.pdf.gz
./onlinedocs/gcobol_io.pdf
./onlinedocs/gcobol
./onlinedocs/gcobol/gcobol_io.html.gz
./onlinedocs/gcobol/gcobol_io.html
./onlinedocs/gcobol/gcobol.html.gz
./onlinedocs/gcobol/gcobol.html

./onlinedocs/gnat_rm/gnat_005frm_002finterfacing_005fto_005fother_005flanguages-interfacing-to-cobol.html.gz

./onlinedocs/gnat_rm/gnat_005frm_002finterfacing_005fto_005fother_005flanguages-interfacing-to-cobol.html

./onlinedocs/gnat_rm/gnat_005frm_002fimplementation_005fadvice-rm-f-7-cobol-support.html.gz

./onlinedocs/gnat_rm/gnat_005frm_002fimplementation_005fadvice-rm-f-7-cobol-support.html

./onlinedocs/gnat_rm/gnat_005frm_002fimplementation_005fadvice-rm-b-4-95-98-interfacing-with-cobol.html.gz

./onlinedocs/gnat_rm/gnat_005frm_002fimplementation_005fadvice-rm-b-4-95-98-interfacing-with-cobol.html

2025-04-07  Jakub Jelinek  

PR web/119227
* update_web_docs_git: Rename mdoc2pdf_html to cobol_mdoc2pdf_html,
perform mkdir -p $DOCSDIR/gcobol gcobol, remove $d/ from pdf and in
html replace it with gcobol/; update uses of the renamed function.

Diff:
---
 maintainer-scripts/update_web_docs_git | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/maintainer-scripts/update_web_docs_git 
b/maintainer-scripts/update_web_docs_git
index 574397cec54b..424808847b6b 100755
--- a/maintainer-scripts/update_web_docs_git
+++ b/maintainer-scripts/update_web_docs_git
@@ -205,11 +205,12 @@ done
 #
 # The COBOL FE maintains man pages.  Convert them to HTML and PDF.
 #
-mdoc2pdf_html() {
+cobol_mdoc2pdf_html() {
+mkdir -p $DOCSDIR/gcobol gcobol
 input="$1"
 d="${input%/*}"
-pdf="$d/$2"
-html="$d/$3"
+pdf="$2"
+html="gcobol/$3"
 groff -mdoc -T pdf "$input" > "${pdf}~"
 mv "${pdf}~" "${pdf}"
 mandoc -T html "$filename" > "${html}~"
@@ -221,10 +222,10 @@ find . -name gcobol.[13] |
 do
 case ${filename##*.} in
 1)
-mdoc2pdf_html "$filename" gcobol.pdf gcobol.html
+cobol_mdoc2pdf_html "$filename" gcobol.pdf gcobol.html
 ;;
 3)
-mdoc2pdf_html "$filename" gcobol_io.pdf gcobol_io.html
+cobol_mdoc2pdf_html "$filename" gcobol_io.pdf gcobol_io.html
 ;;
 esac
 done


[gcc r15-9252] cobol: Fix up make html for COBOL [PR119227]

2025-04-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5b9bd0b0cd4bd23b7906ea2f3e9c2b1583d53554

commit r15-9252-g5b9bd0b0cd4bd23b7906ea2f3e9c2b1583d53554
Author: Jakub Jelinek 
Date:   Mon Apr 7 13:52:28 2025 +0200

cobol: Fix up make html for COBOL [PR119227]

What make html does for COBOL is quite inconsistent with all
other FEs.  Normally make html creates HTML/gcc-15.0.1/
subdirectory and puts there subdirectories like gcc, cpp, gccint, gfortran
etc. and only those contain *.html files.  COBOL puts gcobol.html and
gcobol-io.html into the current directory instead.

The following patch puts them into $(build_htmldir)/gcobol/ directory.

2025-04-07  Jakub Jelinek  

PR web/119227
* Make-lang.in (GCOBOL_HTML_FILES): New variable.
(cobol.install-html, cobol.html, cobol.srchtml): Use
$(GCOBOL_HTML_FILES) instead of gcobol.html gcobol-io.html.
(gcobol.html): Rename goal to ...
($(build_htmldir)/gcobol/gcobol.html): ... this.  Run mkinstalldirs.
(gcobol-io.html): Rename goal to ...
($(build_htmldir)/gcobol/gcobol-io.html): ... this.  Run 
mkinstalldirs.

Diff:
---
 gcc/cobol/Make-lang.in | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in
index 990d51a85788..d14190f07957 100644
--- a/gcc/cobol/Make-lang.in
+++ b/gcc/cobol/Make-lang.in
@@ -40,6 +40,8 @@ GCOBOL_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell 
echo gcobol|sed '$(
 GCOBC_INSTALL_NAME := $(shell echo gcobc|sed '$(program_transform_name)')
 GCOBC_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcobc|sed 
'$(program_transform_name)')
 
+GCOBOL_HTML_FILES = $(addprefix $(build_htmldir)/gcobol/,gcobol.html 
gcobol-io.html)
+
 cobol: cobol1$(exeext)
 cobol.serial = cobol1$(exeext)
 .PHONY: cobol
@@ -303,8 +305,8 @@ cobol.install-pdf: installdirs gcobol.pdf gcobol-io.pdf
 
 cobol.install-plugin:
 
-cobol.install-html: installdirs gcobol.html gcobol-io.html
-   $(INSTALL_DATA) gcobol.html gcobol-io.html $(DESTDIR)$(htmldir)/
+cobol.install-html: installdirs $(GCOBOL_HTML_FILES)
+   $(INSTALL_DATA) $(GCOBOL_HTML_FILES) $(DESTDIR)$(htmldir)/
 
 cobol.info:
 cobol.srcinfo:
@@ -323,14 +325,16 @@ gcobol-io.pdf: $(srcdir)/cobol/gcobol.3
groff -mdoc -T pdf  $^ > $@~
@mv $@~ $@
 
-cobol.html: gcobol.html gcobol-io.html
-cobol.srchtml: gcobol.html gcobol-io.html
+cobol.html: $(GCOBOL_HTML_FILES)
+cobol.srchtml: $(GCOBOL_HTML_FILES)
ln $^ $(srcdir)/cobol/
 
-gcobol.html: $(srcdir)/cobol/gcobol.1
+$(build_htmldir)/gcobol/gcobol.html: $(srcdir)/cobol/gcobol.1
+   $(mkinstalldirs) $(build_htmldir)/gcobol
mandoc -T html $^ > $@~
@mv $@~ $@
-gcobol-io.html: $(srcdir)/cobol/gcobol.3
+$(build_htmldir)/gcobol/gcobol-io.html: $(srcdir)/cobol/gcobol.3
+   $(mkinstalldirs) $(build_htmldir)/gcobol
mandoc -T html $^ > $@~
@mv $@~ $@


[gcc(refs/users/alfierichards/heads/fmv_c)] Add x86 FMV symbol tests

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:5c3d8e4a4e73b4dd32b3c03334cd1d97b9bda9db

commit 5c3d8e4a4e73b4dd32b3c03334cd1d97b9bda9db
Author: Alice Carlotti 
Date:   Tue Jan 28 15:17:33 2025 +

Add x86 FMV symbol tests

This is for testing the x86 mangling of FMV versioned function
assembly names.

gcc/testsuite/ChangeLog:

* g++.target/i386/mv-symbols1.C: New test.
* g++.target/i386/mv-symbols2.C: New test.
* g++.target/i386/mv-symbols3.C: New test.
* g++.target/i386/mv-symbols4.C: New test.
* g++.target/i386/mv-symbols5.C: New test.
* g++.target/i386/mvc-symbols1.C: New test.
* g++.target/i386/mvc-symbols2.C: New test.
* g++.target/i386/mvc-symbols3.C: New test.
* g++.target/i386/mvc-symbols4.C: New test.

Co-authored-by: Alfie Richards 

Diff:
---
 gcc/testsuite/g++.target/i386/mv-symbols1.C  | 68 
 gcc/testsuite/g++.target/i386/mv-symbols2.C  | 56 +++
 gcc/testsuite/g++.target/i386/mv-symbols3.C  | 44 ++
 gcc/testsuite/g++.target/i386/mv-symbols4.C  | 50 
 gcc/testsuite/g++.target/i386/mv-symbols5.C  | 56 +++
 gcc/testsuite/g++.target/i386/mvc-symbols1.C | 44 ++
 gcc/testsuite/g++.target/i386/mvc-symbols2.C | 29 
 gcc/testsuite/g++.target/i386/mvc-symbols3.C | 35 ++
 gcc/testsuite/g++.target/i386/mvc-symbols4.C | 23 ++
 9 files changed, 405 insertions(+)

diff --git a/gcc/testsuite/g++.target/i386/mv-symbols1.C 
b/gcc/testsuite/g++.target/i386/mv-symbols1.C
new file mode 100644
index ..1290299aea5e
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv-symbols1.C
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target("arch=slm")))
+int foo ()
+{
+  return 3;
+}
+
+__attribute__((target("sse4.2")))
+int foo ()
+{
+  return 5;
+}
+
+__attribute__((target("sse4.2")))
+int foo (int)
+{
+  return 6;
+}
+
+__attribute__((target("arch=slm")))
+int foo (int)
+{
+  return 4;
+}
+
+__attribute__((target("default")))
+int foo (int)
+{
+  return 2;
+}
+
+int bar()
+{
+  return foo ();
+}
+
+int bar(int x)
+{
+  return foo (x);
+}
+
+/* When updating any of the symbol names in these tests, make sure to also
+   update any tests for their absence in mvc-symbolsN.C */
+
+/* { dg-final { scan-assembler-times "\n_Z3foov:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tcall\t_Z7_Z3foovv\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z7_Z3foovv, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"\n\t\.set\t_Z7_Z3foovv,_Z3foov\.resolver\n" 1 } } */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tcall\t_Z7_Z3fooii\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z7_Z3fooii, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times 
"\n\t\.set\t_Z7_Z3fooii,_Z3fooi\.resolver\n" 1 } } */
diff --git a/gcc/testsuite/g++.target/i386/mv-symbols2.C 
b/gcc/testsuite/g++.target/i386/mv-symbols2.C
new file mode 100644
index ..8b75565d78d0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv-symbols2.C
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target("default")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target("arch=slm")))
+int foo ()
+{
+  return 3;
+}
+
+__attribute__((target("sse4.2")))
+int foo ()
+{
+  return 5;
+}
+
+__attribute__((target("sse4.2")))
+int foo (int)
+{
+  return 6;
+}
+
+__attribute__((target("arch=slm")))
+int foo (int)
+{
+  return 4;
+}
+
+__attribute__((target("default")))
+int foo (int)
+{
+  return 2;
+}
+
+/* When updating any of the symbol names in these tests, make sure to also
+   update any tests for their absence in mvc-symbolsN.C */
+
+/* { dg-final { scan-assembler-times "\n_Z3foov:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.arch_slm:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.sse4.2:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 0 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z7_Z3foovv, 
@gnu_indirect_function\n" 0 } } */
+/* { dg-final { scan-assembler-times 
"\n\t\.set\t_Z7_Z3foovv,_Z3foov\.resolver\n" 0 } } */
+
+/* { dg-final { scan-assembler-

[gcc] Created branch 'alfierichards/heads/fmv_c' in namespace 'refs/users'

2025-04-07 Thread Alfie Richards via Gcc-cvs
The branch 'alfierichards/heads/fmv_c' was created in namespace 'refs/users' 
pointing to:

 2061ee1fbd17... c: Improve diagnostics for C FMV declaration conflicts.


[gcc(refs/users/alfierichards/heads/fmv_c)] Add PowerPC FMV symbol tests.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:d5b12f31d02063d4d7c8051887b44d6b6bc62ef7

commit d5b12f31d02063d4d7c8051887b44d6b6bc62ef7
Author: Alice Carlotti 
Date:   Tue Jan 28 15:16:31 2025 +

Add PowerPC FMV symbol tests.

This tests the mangling of function assembly names when annotated with
target_clones attributes.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/mvc-symbols1.C: New test.
* g++.target/powerpc/mvc-symbols2.C: New test.
* g++.target/powerpc/mvc-symbols3.C: New test.
* g++.target/powerpc/mvc-symbols4.C: New test.

Co-authored-by: Alfie Richards 

Diff:
---
 gcc/testsuite/g++.target/powerpc/mvc-symbols1.C | 47 +
 gcc/testsuite/g++.target/powerpc/mvc-symbols2.C | 35 ++
 gcc/testsuite/g++.target/powerpc/mvc-symbols3.C | 41 +
 gcc/testsuite/g++.target/powerpc/mvc-symbols4.C | 29 +++
 4 files changed, 152 insertions(+)

diff --git a/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C 
b/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C
new file mode 100644
index ..9424382bf141
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/mvc-symbols1.C
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target_clones("default", "cpu=power6", "cpu=power6x")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_clones("cpu=power6x", "cpu=power6", "default")))
+int foo (int)
+{
+  return 2;
+}
+
+int bar()
+{
+  return foo ();
+}
+
+int bar(int x)
+{
+  return foo (x);
+}
+
+/* { dg-final { scan-assembler-times "\n_Z3foov\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tbl _Z3foov\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3foov, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3foov,_Z3foov\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6\n" 1 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6x\n" 0 } 
} */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\tbl _Z3fooi\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3fooi, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3fooi,_Z3fooi\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.cpu_power6\n" 0 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3fooi\.cpu_power6x\n" 1 } 
} */
diff --git a/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C 
b/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C
new file mode 100644
index ..edf54480efd1
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/mvc-symbols2.C
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+
+__attribute__((target_clones("default", "cpu=power6", "cpu=power6x")))
+int foo ()
+{
+  return 1;
+}
+
+__attribute__((target_clones("cpu=power6x", "cpu=power6", "default")))
+int foo (int)
+{
+  return 2;
+}
+
+/* { dg-final { scan-assembler-times "\n_Z3foov\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3foov\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3foov, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3foov,_Z3foov\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.default\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6\n" 1 } } 
*/
+/* { dg-final { scan-assembler-times "\n\t\.quad\t_Z3foov\.cpu_power6x\n" 0 } 
} */
+
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.default:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.cpu_power6x:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n_Z3fooi\.resolver:\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.type\t_Z3fooi, 
@gnu_indirect_function\n" 1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.set\t_Z3fooi,_Z3fooi\.resolver\n" 
1 } } */
+/* { dg-final { scan-assembler-times "\n\t\.

[gcc(refs/users/alfierichards/heads/fmv_c)] Change function versions to be implicitly ordered.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:ef26faa3ebf69727e6f73b430d935e390f9cb577

commit ef26faa3ebf69727e6f73b430d935e390f9cb577
Author: Alfie Richards 
Date:   Thu Mar 27 14:12:06 2025 +

Change function versions to be implicitly ordered.

This changes function version structures to maintain the default version
as the first declaration in the linked data structures by giving priority
to the set containing the default when constructing the structure.

This allows for removing logic for moving the default to the first
position which was duplicated across target specific code and enables
easier reasoning about function sets when checking for a default.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::record_function_versions): Refactor and
rename to...
(cgraph_node::add_function_version): new function.
* cgraph.h (cgraph_node::record_function_versions): Refactor and
rename to...
(cgraph_node::add_function_version): new function.
* config/aarch64/aarch64.cc 
(aarch64_get_function_versions_dispatcher):
Remove reordering.
* config/i386/i386-features.cc 
(ix86_get_function_versions_dispatcher):
Remove reordering.
* config/riscv/riscv.cc (riscv_get_function_versions_dispatcher):
Remove reordering.
* config/rs6000/rs6000.cc (rs6000_get_function_versions_dispatcher):
Remove reordering.

gcc/cp/ChangeLog:

* decl.cc (maybe_version_functions): Change record_function_versions
call to add_function_version.

Diff:
---
 gcc/cgraph.cc| 50 ++--
 gcc/cgraph.h |  6 ++---
 gcc/config/aarch64/aarch64.cc| 37 +++--
 gcc/config/i386/i386-features.cc | 33 --
 gcc/config/riscv/riscv.cc| 38 +-
 gcc/config/rs6000/rs6000.cc  | 35 +---
 gcc/cp/decl.cc   |  8 ++-
 7 files changed, 63 insertions(+), 144 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 6ae6a97f6f56..a2ad2516c12b 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -231,43 +231,49 @@ cgraph_node::delete_function_version_by_decl (tree decl)
   decl_node->remove ();
 }
 
-/* Record that DECL1 and DECL2 are semantically identical function
-   versions.  */
+/* Add decl to the structure of semantically identical function versions..  */
 void
-cgraph_node::record_function_versions (tree decl1, tree decl2)
+cgraph_node::add_function_version (cgraph_function_version_info *fn_v,
+  tree decl)
 {
-  cgraph_node *decl1_node = cgraph_node::get_create (decl1);
-  cgraph_node *decl2_node = cgraph_node::get_create (decl2);
-  cgraph_function_version_info *decl1_v = NULL;
-  cgraph_function_version_info *decl2_v = NULL;
+  cgraph_node *decl_node = cgraph_node::get_create (decl);
+  cgraph_function_version_info *decl_v = NULL;
+
   cgraph_function_version_info *before;
   cgraph_function_version_info *after;
 
-  gcc_assert (decl1_node != NULL && decl2_node != NULL);
-  decl1_v = decl1_node->function_version ();
-  decl2_v = decl2_node->function_version ();
+  gcc_assert (decl_node != NULL);
+
+  decl_v = decl_node->function_version ();
 
-  if (decl1_v != NULL && decl2_v != NULL)
+  /* If the nodes are already linked, skip.  */
+  if (decl_v != NULL && (decl_v->next || decl_v->prev))
 return;
 
-  if (decl1_v == NULL)
-decl1_v = decl1_node->insert_new_function_version ();
+  if (decl_v == NULL)
+decl_v = decl_node->insert_new_function_version ();
 
-  if (decl2_v == NULL)
-decl2_v = decl2_node->insert_new_function_version ();
+  gcc_assert (decl_v);
+  gcc_assert (fn_v);
 
-  /* Chain decl2_v and decl1_v.  All semantically identical versions
- will be chained together.  */
+  before = fn_v;
+  after = decl_v;
 
-  before = decl1_v;
-  after = decl2_v;
+  /* Go to the beginning of both nodes (as after is on its own we just need to
+ this for before).  */
+  while (before->prev != NULL)
+before = before->prev;
 
+  /* Potentially swap the nodes to maintain the default always being in the
+ first position.  */
+  if (is_function_default_version (decl))
+std::swap (before, after);
+
+  /* Go to last node of before.  */
   while (before->next != NULL)
 before = before->next;
 
-  while (after->prev != NULL)
-after= after->prev;
-
+  /* Chain decl2_v and decl1_v.  */
   before->next = after;
   after->prev = before;
 }
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 065fcc742e8b..6759505bf338 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1319,9 +1319,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
 return m_summary_id;
   }
 
-  /* Record that DECL1 and DECL2 are semantically identical function
- versions.  */
-  static void record_f

[gcc(refs/users/alfierichards/heads/fmv_c)] Add string_slice class.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:aa80e36976e1a5d6e51bcad77ccb27fcd6e31874

commit aa80e36976e1a5d6e51bcad77ccb27fcd6e31874
Author: Alfie Richards 
Date:   Mon Mar 24 10:45:56 2025 +

Add string_slice class.

The string_slice inherits from array_slice and is used to refer to a
substring of an array that is memory managed elsewhere without modifying
the underlying array.

For example, this is useful in cases such as when needing to refer to a
substring of an attribute in the syntax tree.

This commit also adds some minimal helper functions for string_slice,
such as a strtok alternative, equality operators, strcmp, and a function
to strip whitespace from the beginning and end of a string_slice.

gcc/c-family/ChangeLog:

* c-format.cc (local_string_slice_node): New node type.
(asm_fprintf_char_table): New entry.
(init_dynamic_diag_info): Add support for string_slice.
* c-format.h (T_STRING_SLICE): New node type.

gcc/ChangeLog:

* pretty-print.cc (format_phase_2): Add support for string_slice.
* vec.cc (string_slice::tokenize): New method.
(strcmp): New implementation for string_slice.
(string_slice::strip): New method.
(test_string_slice_initializers): New test.
(test_string_slice_tokenize): Ditto.
(test_string_slice_strcmp): Ditto.
(test_string_slice_equality): Ditto.
(test_string_slice_inequality): Ditto.
(test_string_slice_invalid): Ditto.
(test_string_slice_strip): Ditto.
(vec_cc_tests): Add new tests.
* vec.h (class string_slice): New class.
(strcmp): New implementation for stirng_slice.

Diff:
---
 gcc/c-family/c-format.cc |   9 +++
 gcc/c-family/c-format.h  |   1 +
 gcc/pretty-print.cc  |  10 +++
 gcc/vec.cc   | 207 +++
 gcc/vec.h|  45 +++
 5 files changed, 272 insertions(+)

diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index 211d20dd25bf..8b4447f9fdc8 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -70,6 +70,7 @@ static GTY(()) tree local_event_ptr_node;
 static GTY(()) tree local_pp_element_ptr_node;
 static GTY(()) tree local_gimple_ptr_node;
 static GTY(()) tree local_cgraph_node_ptr_node;
+static GTY(()) tree local_string_slice_node;
 static GTY(()) tree locus;
 
 static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
@@ -770,6 +771,7 @@ static const format_char_info asm_fprintf_char_table[] =
   { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q",  "c",  NULL }, \
   { "r",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","//cR",   NULL 
}, \
   { "@",   1, STD_C89, { T_EVENT_PTR,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL }, \
+  { "B",   1, STD_C89, { T_STRING_SLICE,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q", "",   NULL }, \
   { "e",   1, STD_C89, { T_PP_ELEMENT_PTR,   BADLEN,  BADLEN,  BADLEN,  
BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"", NULL }, \
   { "<",   0, STD_C89, NOARGUMENTS, "",  "<",   NULL }, \
   { ">",   0, STD_C89, NOARGUMENTS, "",  ">",   NULL }, \
@@ -5211,6 +5213,13 @@ init_dynamic_diag_info (void)
   || local_cgraph_node_ptr_node == void_type_node)
 local_cgraph_node_ptr_node = get_named_type ("cgraph_node");
 
+  /* Similar to the above but for string_slice*.  */
+  if (!local_string_slice_node
+  || local_string_slice_node == void_type_node)
+{
+  local_string_slice_node = get_named_type ("string_slice");
+}
+
   /* Similar to the above but for diagnostic_event_id_t*.  */
   if (!local_event_ptr_node
   || local_event_ptr_node == void_type_node)
diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 323338cb8e7f..d44d3862d830 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -317,6 +317,7 @@ struct format_kind_info
 #define T89_G   { STD_C89, NULL, &local_gimple_ptr_node }
 #define T_CGRAPH_NODE   { STD_C89, NULL, &local_cgraph_node_ptr_node }
 #define T_EVENT_PTR{ STD_C89, NULL, &local_event_ptr_node }
+#define T_STRING_SLICE{ STD_C89, NULL, &local_string_slice_node }
 #define T_PP_ELEMENT_PTR{ STD_C89, NULL, &local_pp_element_ptr_node }
 #define T89_T   { STD_C89, NULL, &local_tree_type_node }
 #define T89_V  { STD_C89, NULL, T_V }
diff --git a/gcc/pretty-print.cc b/gcc/pretty-print.cc
index 79c7bc2b6625..280cde30adcb 100644
--- a/gcc/pretty-print.cc
+++ b/gcc/pretty-print.cc
@@ -2035,6 +2035,16 @@ format_phase_2 (pretty_printer *pp,
pp_string (pp, va_arg (*text.m_args_ptr, const cha

[gcc(refs/users/alfierichards/heads/fmv_c)] Add dispatcher_resolver_function and is_target_clone to cgraph_node.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:4936254aa6111db3e85e07da4894b6950a7d1b76

commit 4936254aa6111db3e85e07da4894b6950a7d1b76
Author: Alfie Richards 
Date:   Fri Jan 31 11:46:08 2025 +

Add dispatcher_resolver_function and is_target_clone to cgraph_node.

These flags are used to make sure mangling is done correctly.

gcc/ChangeLog:

* cgraph.h (struct cgraph_node): Add dispatcher_resolver_function 
and
is_target_clone.

Diff:
---
 gcc/cgraph.h | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 4a4fb7302b19..91e5de30f98c 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -907,7 +907,9 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public 
symtab_node
   used_as_abstract_origin (false),
   lowered (false), process (false), frequency (NODE_FREQUENCY_NORMAL),
   only_called_at_startup (false), only_called_at_exit (false),
-  tm_clone (false), dispatcher_function (false), calls_comdat_local 
(false),
+  tm_clone (false), dispatcher_function (false),
+  dispatcher_resolver_function (false), is_target_clone (false),
+  calls_comdat_local (false),
   icf_merged (false), nonfreeing_fn (false), merged_comdat (false),
   merged_extern_inline (false), parallelized_function (false),
   split_part (false), indirect_call_target (false), local (false),
@@ -1465,6 +1467,11 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
   unsigned tm_clone : 1;
   /* True if this decl is a dispatcher for function versions.  */
   unsigned dispatcher_function : 1;
+  /* True if this decl is a resolver for function versions.  */
+  unsigned dispatcher_resolver_function : 1;
+  /* True this is part of a multiversioned set and the default version
+ comes from a target_clone attribute.  */
+  unsigned is_target_clone : 1;
   /* True if this decl calls a COMDAT-local function.  This is set up in
  compute_fn_summary and inline_call.  */
   unsigned calls_comdat_local : 1;


[gcc(refs/users/alfierichards/heads/fmv_c)] Add assembler_name to cgraph_function_version_info.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:376346bceddbd874cb7a0c02fbe2feb77cecdce5

commit 376346bceddbd874cb7a0c02fbe2feb77cecdce5
Author: Alfie Richards 
Date:   Wed Feb 5 12:56:52 2025 +

Add assembler_name to cgraph_function_version_info.

This adds the assembler_name member to cgraph_function_version_info
to store the base assembler name for the function to be mangled. This is
used in later patches for refactoring FMV mangling.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::insert_new_function_version): Record
assembler_name.
* cgraph.h (struct cgraph_function_version_info): Add 
assembler_name.

Diff:
---
 gcc/cgraph.cc | 1 +
 gcc/cgraph.h  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index a2ad2516c12b..e81c7262d36a 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -187,6 +187,7 @@ cgraph_node::insert_new_function_version (void)
   version_info_node = NULL;
   version_info_node = ggc_cleared_alloc ();
   version_info_node->this_node = this;
+  version_info_node->assembler_name = DECL_ASSEMBLER_NAME (this->decl);
 
   if (cgraph_fnver_htab == NULL)
 cgraph_fnver_htab = hash_table::create_ggc (2);
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 6759505bf338..4a4fb7302b19 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -856,6 +856,9 @@ struct GTY((for_user)) cgraph_function_version_info {
  dispatcher. The dispatcher decl is an alias to the resolver
  function decl.  */
   tree dispatcher_resolver;
+
+  /* The assmbly name of the function set before version mangling.  */
+  tree assembler_name;
 };
 
 #define DEFCIFCODE(code, type, string) CIF_ ## code,


[gcc(refs/users/alfierichards/heads/fmv_c)] Remove unnecessary `record` argument from maybe_version_functions.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:5cb80d373a338aec848850d90d12063ff5a704f9

commit 5cb80d373a338aec848850d90d12063ff5a704f9
Author: Alfie Richards 
Date:   Tue Jan 28 15:42:49 2025 +

Remove unnecessary `record` argument from maybe_version_functions.

Previously, the `record` argument in maybe_version_function allowed the
call to cgraph_node::record_function_versions to be skipped.  However,
this was only skipped when both decls were already marked as versioned,
in which case we trigger the early exit in record_function_versions
instead. Therefore, the argument is unnecessary.

gcc/cp/ChangeLog:

* class.cc (add_method): Remove argument.
* cp-tree.h (maybe_version_functions): Ditto.
* decl.cc (decls_match): Ditto.
(maybe_version_functions): Ditto.

Diff:
---
 gcc/cp/class.cc  | 2 +-
 gcc/cp/cp-tree.h | 2 +-
 gcc/cp/decl.cc   | 9 +++--
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 2b694b98e565..93f1a1bdd81b 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -1402,7 +1402,7 @@ add_method (tree type, tree method, bool via_using)
   /* If these are versions of the same function, process and
 move on.  */
   if (TREE_CODE (fn) == FUNCTION_DECL
- && maybe_version_functions (method, fn, true))
+ && maybe_version_functions (method, fn))
continue;
 
   if (DECL_INHERITED_CTOR (method))
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 927f51b116b3..5bfc08191f6d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7122,7 +7122,7 @@ extern void determine_local_discriminator (tree, tree = 
NULL_TREE);
 extern bool member_like_constrained_friend_p   (tree);
 extern bool fns_correspond (tree, tree);
 extern int decls_match (tree, tree, bool = true);
-extern bool maybe_version_functions(tree, tree, bool);
+extern bool maybe_version_functions(tree, tree);
 extern bool validate_constexpr_redeclaration   (tree, tree);
 extern bool merge_default_template_args(tree, tree, bool);
 extern tree duplicate_decls(tree, tree,
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 2ed94fd786ce..85c1b63c1f54 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -1216,9 +1216,7 @@ decls_match (tree newdecl, tree olddecl, bool 
record_versions /* = true */)
  && targetm.target_option.function_versions (newdecl, olddecl))
{
  if (record_versions)
-   maybe_version_functions (newdecl, olddecl,
-(!DECL_FUNCTION_VERSIONED (newdecl)
- || !DECL_FUNCTION_VERSIONED (olddecl)));
+   maybe_version_functions (newdecl, olddecl);
  return 0;
}
 }
@@ -1289,7 +1287,7 @@ maybe_mark_function_versioned (tree decl)
If RECORD is set to true, record function versions.  */
 
 bool
-maybe_version_functions (tree newdecl, tree olddecl, bool record)
+maybe_version_functions (tree newdecl, tree olddecl)
 {
   if (!targetm.target_option.function_versions (newdecl, olddecl))
 return false;
@@ -1312,8 +1310,7 @@ maybe_version_functions (tree newdecl, tree olddecl, bool 
record)
   maybe_mark_function_versioned (newdecl);
 }
 
-  if (record)
-cgraph_node::record_function_versions (olddecl, newdecl);
+  cgraph_node::record_function_versions (olddecl, newdecl);
 
   return true;
 }


[gcc(refs/users/alfierichards/heads/fmv_c)] Change make_attribute to take string_slice.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:591e01d35957ab3e2a8082e0f2ef6e855683aa16

commit 591e01d35957ab3e2a8082e0f2ef6e855683aa16
Author: Alfie Richards 
Date:   Fri Jan 31 10:49:42 2025 +

Change make_attribute to take string_slice.

gcc/ChangeLog:

* attribs.cc (make_attribute): Change arguments.
* attribs.h (make_attribute): Change arguments.

Diff:
---
 gcc/attribs.cc | 18 ++
 gcc/attribs.h  |  2 +-
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index f6667839c013..37d6ce0f9161 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1072,25 +1072,19 @@ apply_tm_attr (tree fndecl, tree attr)
   decl_attributes (&TREE_TYPE (fndecl), tree_cons (attr, NULL, NULL), 0);
 }
 
-/* Makes a function attribute of the form NAME(ARG_NAME) and chains
+/* Makes a function attribute of the form NAME (ARG_NAME) and chains
it to CHAIN.  */
 
 tree
-make_attribute (const char *name, const char *arg_name, tree chain)
+make_attribute (string_slice name, string_slice arg_name, tree chain)
 {
-  tree attr_name;
-  tree attr_arg_name;
-  tree attr_args;
-  tree attr;
-
-  attr_name = get_identifier (name);
-  attr_arg_name = build_string (strlen (arg_name), arg_name);
-  attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
-  attr = tree_cons (attr_name, attr_args, chain);
+  tree attr_name = get_identifier_with_length (name.begin (), name.size ());
+  tree attr_arg_name = build_string (arg_name.size (), arg_name.begin ());
+  tree attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
+  tree attr = tree_cons (attr_name, attr_args, chain);
   return attr;
 }
 
-
 /* Common functions used for target clone support.  */
 
 /* Comparator function to be used in qsort routine to sort attribute
diff --git a/gcc/attribs.h b/gcc/attribs.h
index 4b946390f76b..b8b6838599cc 100644
--- a/gcc/attribs.h
+++ b/gcc/attribs.h
@@ -45,7 +45,7 @@ extern bool cxx11_attribute_p (const_tree);
 extern tree get_attribute_name (const_tree);
 extern tree get_attribute_namespace (const_tree);
 extern void apply_tm_attr (tree, tree);
-extern tree make_attribute (const char *, const char *, tree);
+extern tree make_attribute (string_slice, string_slice, tree);
 extern bool attribute_ignored_p (tree);
 extern bool attribute_ignored_p (const attribute_spec *const);
 extern bool any_nonignored_attribute_p (tree);


[gcc(refs/users/alfierichards/heads/fmv_c)] Refactor FMV frontend hooks and logic.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:564bd7c0b472d5ce1d112c6111dedc1a480bb3f1

commit 564bd7c0b472d5ce1d112c6111dedc1a480bb3f1
Author: Alfie Richards 
Date:   Mon Mar 24 13:20:01 2025 +

Refactor FMV frontend hooks and logic.

This change refactors FMV handling in the frontend to allows greater
reasoning about versions in shared code.

This is needed for target_version semantics and allowing target_clones
and target_versions to both be used for the declaration there are now
two questions that need to be answered for the front end.

1. Are these two declarations completely distinct FMV declarations
(ie. the versions they define have no overlap). If so, they don't match.
2. Are these two declarations matching and therefore mergeable.
(ie. two target_clone decls that define the same set of versions, or
an un-annotated declaration, and a target_clones definition containing the
default version). If so, the existing merging logic should be used to
try to merge these and diagnose if it's not possible. If not, then this
needs to be diagnosed.

To do this the common_function_versions function has been renamed
distinct_function_versions (meaning, are the versions defined by these
two functions completely distinct from eachother).

The common function version hook was changed to instead take two
string_slice's and determine if they define the same version.

There is a new function, called mergeable_version_decls which checks
if two decls (which define overlapping versions) can be merged.
For example, if they are two target_clone decls which define the exact
same set of versions.

This change also records the conflicting version so that it can be
included in diagnostics.

gcc/ChangeLog:

* attribs.cc (attr_strcmp): Moved to target specific code.
(sorted_attr_string): Moved to target specific code.
(common_function_versions): New function.
* attribs.h (sorted_attr_string): Removed.
(common_function_versions): New function.
* config/aarch64/aarch64.cc (aarch64_common_function_versions):
New function.
* config/riscv/riscv.cc (riscv_common_function_versions): New 
function.
* doc/tm.texi: Regenerated.
* target.def: Change common_function_versions hook.
* tree.cc (distinct_version_decls): New function.
(mergeable_version_decls): Ditto.
* tree.h (distinct_version_decls): New function.
(mergeable_version_decls): Ditto.

gcc/cp/ChangeLog:

* class.cc (resolve_address_of_overloaded_function): Updated to use
distinct_version_decls instead of common_function_version hook.
* cp-tree.h (decls_match): Updated to use
distinct_version_decls instead of common_function_version hook.
* decl.cc (decls_match): Refacture to use distinct_version_decls and
to pass through conflicting_version argument.
(maybe_version_functions): Updated to use
distinct_version_decls instead of common_function_version hook.
(duplicate_decls): Add logic to handle conflicting unmergable decls
and improve diagnostics for conflicting versions.
* decl2.cc (check_classfn): Updated to use
distinct_version_decls instead of common_function_version hook.

Diff:
---
 gcc/attribs.cc|  75 ++--
 gcc/attribs.h |   3 +-
 gcc/config/aarch64/aarch64.cc |  16 ++--
 gcc/config/riscv/riscv.cc |  30 ---
 gcc/cp/class.cc   |   4 +-
 gcc/cp/cp-tree.h  |   2 +-
 gcc/cp/decl.cc|  43 +++--
 gcc/cp/decl2.cc   |   2 +-
 gcc/doc/tm.texi   |   4 +-
 gcc/target.def|   6 +-
 gcc/tree.cc   | 204 ++
 gcc/tree.h|   6 ++
 12 files changed, 291 insertions(+), 104 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 80833388ff2e..04a9e743dbe0 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1086,7 +1086,14 @@ make_attribute (string_slice name, string_slice 
arg_name, tree chain)
   return attr;
 }
 
-/* Common functions used for target clone support.  */
+/* Used for targets with target_version semantics.  */
+
+bool
+common_function_versions (string_slice fn1 ATTRIBUTE_UNUSED,
+ string_slice fn2 ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
 
 /* Comparator function to be used in qsort routine to sort attribute
specification strings to "target".  */
@@ -1176,72 +1183,6 @@ sorted_attr_string (tree arglist)
   XDELETEVEC (attr_str);
   return ret_str;
 }
-
-
-/* This function returns true if FN1 and FN2 are versions of the same function,
-   that is, the target strings of the function decls are differen

[gcc(refs/users/alfierichards/heads/fmv_c)] Add error cases and tests for Aarch64 FMV.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:f54b7da000f2d3fe1211957fb2e33a87882461cc

commit f54b7da000f2d3fe1211957fb2e33a87882461cc
Author: Alfie Richards 
Date:   Thu Feb 13 15:59:43 2025 +

Add error cases and tests for Aarch64 FMV.

This changes the ambiguation error for C++ to cover cases of differently
annotated FMV function sets whose signatures only differ by their return
type.

It also adds tests covering many FMV errors for Aarch64, including
redeclaration, and mixing target_clones and target_versions.

gcc/cp/ChangeLog:
PR c++/119498
* decl.cc (duplicate_decls): Change logic to not always exclude FMV
annotated functions in cases of return type non-ambiguation.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-and-mvc-error1.C: New test.
* g++.target/aarch64/mv-and-mvc-error2.C: New test.
* g++.target/aarch64/mv-and-mvc-error3.C: New test.
* g++.target/aarch64/mv-error1.C: New test.
* g++.target/aarch64/mv-error2.C: New test.
* g++.target/aarch64/mv-error3.C: New test.
* g++.target/aarch64/mv-error4.C: New test.
* g++.target/aarch64/mv-error5.C: New test.
* g++.target/aarch64/mv-error6.C: New test.
* g++.target/aarch64/mv-error7.C: New test.
* g++.target/aarch64/mv-error8.C: New test.
* g++.target/aarch64/mvc-error1.C: New test.
* g++.target/aarch64/mvc-error2.C: New test.
* g++.target/aarch64/mvc-warning1.C: Modified test.

Diff:
---
 gcc/cp/decl.cc  |  7 +--
 .../g++.target/aarch64/mv-and-mvc-error1.C  | 10 ++
 .../g++.target/aarch64/mv-and-mvc-error2.C  | 10 ++
 .../g++.target/aarch64/mv-and-mvc-error3.C  |  9 +
 gcc/testsuite/g++.target/aarch64/mv-error1.C| 19 +++
 gcc/testsuite/g++.target/aarch64/mv-error2.C| 10 ++
 gcc/testsuite/g++.target/aarch64/mv-error3.C| 13 +
 gcc/testsuite/g++.target/aarch64/mv-error4.C| 10 ++
 gcc/testsuite/g++.target/aarch64/mv-error5.C|  9 +
 gcc/testsuite/g++.target/aarch64/mv-error6.C| 21 +
 gcc/testsuite/g++.target/aarch64/mv-error7.C| 12 
 gcc/testsuite/g++.target/aarch64/mv-error8.C| 13 +
 gcc/testsuite/g++.target/aarch64/mvc-error1.C   | 10 ++
 gcc/testsuite/g++.target/aarch64/mvc-error2.C   | 10 ++
 gcc/testsuite/g++.target/aarch64/mvc-warning1.C | 12 ++--
 15 files changed, 171 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index c53a8ac4567a..c20af9bc322a 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -2022,8 +2022,11 @@ duplicate_decls (tree newdecl, tree olddecl, bool 
hiding, bool was_hidden)
}
  /* For function versions, params and types match, but they
 are not ambiguous.  */
- else if ((!DECL_FUNCTION_VERSIONED (newdecl)
-   && !DECL_FUNCTION_VERSIONED (olddecl))
+ else if (((!DECL_FUNCTION_VERSIONED (newdecl)
+&& !DECL_FUNCTION_VERSIONED (olddecl))
+   || !comptypes (TREE_TYPE (TREE_TYPE (newdecl)),
+  TREE_TYPE (TREE_TYPE (olddecl)),
+  COMPARE_STRICT))
   /* Let constrained hidden friends coexist for now, we'll
  check satisfaction later.  */
   && !member_like_constrained_friend_p (newdecl)
diff --git a/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C 
b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C
new file mode 100644
index ..00d3826f757f
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error1.C
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-Wno-experimental-fmv-target" } */
+
+__attribute__ ((target_version ("dotprod"))) int
+foo () { return 3; } /* { dg-message "previous definition" } */
+
+__attribute__ ((target_clones ("dotprod", "sve"))) int
+foo () { return 1; } /* { dg-error "conflicting .dotprod. versions" } */
diff --git a/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C 
b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C
new file mode 100644
index ..bf8a4112a216
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/mv-and-mvc-error2.C
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-Wno-experimental-fmv-target" } */
+
+__attribute__ ((target_version ("default"))) int
+foo () { return 1; } /* { dg-message "old declaration" } */
+
+__attribute__ ((target_clones ("dotprod", "sve"))) float
+foo () { return 3; } /* { dg-error "ambiguating new declaration of" } 

[gcc(refs/users/alfierichards/heads/fmv_c)] Refactor riscv target parsing to take string_slice.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:1c497dd6c089b53e388f847807029348b3620bc3

commit 1c497dd6c089b53e388f847807029348b3620bc3
Author: Alfie Richards 
Date:   Mon Mar 24 11:45:32 2025 +

Refactor riscv target parsing to take string_slice.

This is a quick refactor of the riscv target processing code
to take a string_slice rather than a decl.

The reason for this is to enable it to work with target_clones
where merging logic requires reasoning about each version string
individually in the front end.

This refactor primarily serves just to get this working. Ideally the
logic here would be further refactored as currenly there is no way to
check if a parse fails or not without emitting an error.
This makes things difficult for later patches which intends to emit a
warning and ignoring unrecognised/not parsed target_clone values rather
than erroring which can't currenly be achieved with the current riscv
code.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_process_target_version_str): 
New function..
* config/riscv/riscv-target-attr.cc (riscv_process_target_attr): 
Refactor to take
string_slice.
(riscv_process_target_version_str): Ditto.
* config/riscv/riscv.cc (parse_features_for_version): Refactor to 
take
string_slice.
(riscv_compare_version_priority): Ditto.
(dispatch_function_versions): Change to pass location.

Diff:
---
 gcc/config/riscv/riscv-protos.h   |  2 ++
 gcc/config/riscv/riscv-target-attr.cc | 14 ++
 gcc/config/riscv/riscv.cc | 50 ++-
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2bedd878a04e..1efe45d63e6f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -813,6 +813,8 @@ riscv_option_valid_attribute_p (tree, tree, tree, int);
 extern bool
 riscv_option_valid_version_attribute_p (tree, tree, tree, int);
 extern bool
+riscv_process_target_version_str (string_slice, location_t);
+extern bool
 riscv_process_target_version_attr (tree, location_t);
 extern void
 riscv_override_options_internal (struct gcc_options *);
diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 1d968655f95d..d3f06fb15d42 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -354,11 +354,11 @@ num_occurrences_in_str (char c, char *str)
and update the global target options space.  */
 
 bool
-riscv_process_target_attr (const char *args,
+riscv_process_target_attr (string_slice args,
   location_t loc,
   const struct riscv_attribute_info *attrs)
 {
-  size_t len = strlen (args);
+  size_t len = args.size ();
 
   /* No need to emit warning or error on empty string here, generic code 
already
  handle this case.  */
@@ -369,7 +369,7 @@ riscv_process_target_attr (const char *args,
 
   std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
-  strcpy (str_to_check, args);
+  strncpy (str_to_check, args.begin (), args.size ());
 
   /* Used to catch empty spaces between semi-colons i.e.
  attribute ((target ("attr1;;attr2"))).  */
@@ -391,8 +391,7 @@ riscv_process_target_attr (const char *args,
 
   if (num_attrs != num_semicolons + 1)
 {
-  error_at (loc, "malformed % attribute",
-   args);
+  error_at (loc, "malformed % attribute", &args);
   return false;
 }
 
@@ -513,6 +512,11 @@ riscv_process_target_version_attr (tree args, location_t 
loc)
   return riscv_process_target_attr (str, loc, riscv_target_version_attrs);
 }
 
+bool
+riscv_process_target_version_str (string_slice str, location_t loc)
+{
+  return riscv_process_target_attr (str, loc, riscv_target_version_attrs);
+}
 
 /* Implement TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P.  This is used to
process attribute ((target_version ("..."))).  */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index baf2ea778210..723f8c1ebce6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13092,31 +13092,22 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
-/* This parses the attribute arguments to target_version in DECL and modifies
-   the feature mask and priority required to select those targets.  */
-static void
-parse_features_for_version (tree decl,
+/* This parses STR and modifies the feature mask and priority required to
+   select those targets.  */
+static bool
+parse_features_for_version (string_slice version_str,
+   location_t loc,
struct riscv_feature_bits &res,
int &priority)
 {
-  tree version_attr = lookup_attribute ("target_version",
-  

[gcc(refs/users/alfierichards/heads/fmv_c)] Add reject_target_clone hook in order to filter target_clone versions.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:f5e08ff26e1a5e6f2799f5828008eeec1edddc5d

commit f5e08ff26e1a5e6f2799f5828008eeec1edddc5d
Author: Alfie Richards 
Date:   Mon Mar 24 15:04:38 2025 +

Add reject_target_clone hook in order to filter target_clone versions.

This patch introduces the TARGET_REJECT_FUNCTION_CLONE_VERSION hook
which is used to determine if a target_clones version string parses.

If true is returned, a warning is emitted and from then on the version
is ignored.

This is as specified in the Arm C Language Extension. The purpose of this
is to allow some portability of code using target_clones attributes.

Currently this is only properly implemented for the Aarch64 backend.

For riscv which is the only other backend which uses target_version
semantics a partial implementation is present, where this hook is used
to check parsing, in which errors will be emitted on a failed parse
rather than warnings. A refactor of the riscv parsing logic would be
required to enable this functionality fully.

Additionally, after refactoring the riscv logic, the location argument
to the hook would be unnecessary.

This also fixes PR 118339 where parse failures could cause ICE in Aarch64.

gcc/ChangeLog:

PR target/118339
* attribs.cc (reject_target_clone_version): New function.
* target.def: Add reject_target_clone_version hook.
* tree.cc (get_clone_attr_versions): Add filter and location 
argument.
(get_clone_versions): Update call to get_clone_attr_versions.
* tree.h (get_clone_attr_versions): Add filter and location 
argument.
* config/aarch64/aarch64.cc (aarch64_reject_target_clone_version):
New function
(TARGET_REJECT_FUNCTION_CLONE_VERSION):
New define.
* config/i386/i386.cc (TARGET_REJECT_FUNCTION_CLONE_VERSION):
New define.
* config/riscv/riscv.cc (riscv_reject_target_clone_version):
New function.
(TARGET_REJECT_FUNCTION_CLONE_VERSION):
New define.
* config/rs6000/rs6000.cc (TARGET_REJECT_FUNCTION_CLONE_VERSION):
New define.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Add documentation for new hook.

gcc/c-family/ChangeLog:

* c-attribs.cc (handle_target_clones_attribute): Update to emit 
warnings
for rejected versions.

Diff:
---
 gcc/attribs.cc|  7 +++
 gcc/c-family/c-attribs.cc | 26 +-
 gcc/config/aarch64/aarch64.cc | 20 
 gcc/config/i386/i386.cc   |  3 +++
 gcc/config/riscv/riscv.cc | 18 ++
 gcc/config/rs6000/rs6000.cc   |  3 +++
 gcc/doc/tm.texi   |  5 +
 gcc/doc/tm.texi.in|  2 ++
 gcc/target.def|  8 
 gcc/tree.cc   | 12 ++--
 gcc/tree.h|  8 ++--
 11 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 09c4db96531d..80833388ff2e 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1242,6 +1242,13 @@ common_function_versions (tree fn1, tree fn2)
   return result;
 }
 
+bool
+reject_target_clone_version (string_slice str ATTRIBUTE_UNUSED,
+location_t loc ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
 /* Make a dispatcher declaration for the multi-versioned function DECL.
Calls to DECL function will be replaced with calls to the dispatcher
by the front-end.  Return the decl created.  */
diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 5dff489fccae..b5287f0da06d 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -6132,12 +6132,28 @@ handle_target_clones_attribute (tree *node, tree name, 
tree ARG_UNUSED (args),
}
}
 
-  auto_vec versions= get_clone_attr_versions (args, NULL);
-
-  if (versions.length () == 1)
-   {
+  int num_defaults = 0;
+  auto_vec versions= get_clone_attr_versions (args,
+ &num_defaults,
+ DECL_SOURCE_LOCATION (*node),
+ false);
+
+  for (auto v : versions)
+   if (targetm.reject_function_clone_version
+ (v, DECL_SOURCE_LOCATION (*node)))
  warning (OPT_Wattributes,
-  "single % attribute is ignored");
+  "invalid % version %qB ignored",
+  &v);
+
+  /* Lone target_clones version is always ignored for target attr 
semantics.
+Only ignore under target_version semantics if it is a default
+version.  */
+  if (versions.length () == 1 && (TARGET_HAS_FMV_TARGET_ATTRIBUTE
+ || num_defaults == 1))
+   {
+ if (TARGET_HAS_FMV_

[gcc(refs/users/alfierichards/heads/fmv_c)] Support mixing of target_clones and target_version.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:fd372533f3130f3dd554d5afb05fed9bbe61c8f1

commit fd372533f3130f3dd554d5afb05fed9bbe61c8f1
Author: Alfie Richards 
Date:   Mon Mar 24 16:02:07 2025 +

Support mixing of target_clones and target_version.

This patch adds support for the combination of target_clones and
target_version in the definition of a versioned function.

This patch changes is_function_default_version to consider a function
declaration annotated with target_clones containing default to be a
default version.

This takes advantage of refactoring done in previous patches changing
how target_clones are expanded and how conflicting decls are handled.

gcc/ChangeLog:

* attribs.cc (is_function_default_version): Update to handle
target_clones.
* cgraph.h (FOR_EACH_FUNCTION_REMOVABLE): New macro.
* multiple_target.cc (expand_target_clones): Update logic to delete
empty target_clones and modify diagnostic.
(ipa_target_clone): Update to use
FOR_EACH_FUNCTION_REMOVABLE.

gcc/c-family/ChangeLog:

* c-attribs.cc: Add support for target_version and target_clone 
mixing.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-and-mvc1.C: New test.
* g++.target/aarch64/mv-and-mvc2.C: New test.
* g++.target/aarch64/mv-and-mvc3.C: New test.
* g++.target/aarch64/mv-and-mvc4.C: New test.

Diff:
---
 gcc/attribs.cc | 10 ++-
 gcc/c-family/c-attribs.cc  |  9 +-
 gcc/cgraph.h   |  7 +
 gcc/multiple_target.cc | 24 ---
 gcc/testsuite/g++.target/aarch64/mv-and-mvc1.C | 38 
 gcc/testsuite/g++.target/aarch64/mv-and-mvc2.C | 29 ++
 gcc/testsuite/g++.target/aarch64/mv-and-mvc3.C | 41 ++
 gcc/testsuite/g++.target/aarch64/mv-and-mvc4.C | 38 
 8 files changed, 183 insertions(+), 13 deletions(-)

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index 04a9e743dbe0..57dd01531a3b 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -1247,7 +1247,8 @@ make_dispatcher_decl (const tree decl)
With the target attribute semantics, returns true if the function is marked
as default with the target version.
With the target_version attribute semantics, returns true if the function
-   is either not annotated, or annotated as default.  */
+   is either not annotated, annotated as default, or is a target_clone
+   containing the default declaration.  */
 
 bool
 is_function_default_version (const tree decl)
@@ -1264,6 +1265,13 @@ is_function_default_version (const tree decl)
 }
   else
 {
+  if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (decl)))
+   {
+ int num_defaults = 0;
+ get_clone_versions (decl, &num_defaults);
+ return num_defaults > 0;
+   }
+
   attr = lookup_attribute ("target_version", DECL_ATTRIBUTES (decl));
   if (!attr)
return true;
diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index b5287f0da06d..a4e657d9ffd7 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -249,13 +249,6 @@ static const struct attribute_spec::exclusions 
attr_target_clones_exclusions[] =
   ATTR_EXCL ("always_inline", true, true, true),
   ATTR_EXCL ("target", TARGET_HAS_FMV_TARGET_ATTRIBUTE,
 TARGET_HAS_FMV_TARGET_ATTRIBUTE, TARGET_HAS_FMV_TARGET_ATTRIBUTE),
-  ATTR_EXCL ("target_version", true, true, true),
-  ATTR_EXCL (NULL, false, false, false),
-};
-
-static const struct attribute_spec::exclusions 
attr_target_version_exclusions[] =
-{
-  ATTR_EXCL ("target_clones", true, true, true),
   ATTR_EXCL (NULL, false, false, false),
 };
 
@@ -543,7 +536,7 @@ const struct attribute_spec c_common_gnu_attributes[] =
  attr_target_exclusions },
   { "target_version", 1, 1, true, false, false, false,
  handle_target_version_attribute,
- attr_target_version_exclusions },
+ NULL },
   { "target_clones",  1, -1, true, false, false, false,
  handle_target_clones_attribute,
  attr_target_clones_exclusions },
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 8dcc9315a51a..5a8ccb8042b0 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -3090,6 +3090,13 @@ symbol_table::next_function_with_gimple_body 
(cgraph_node *node)
for ((node) = symtab->first_function (); (node); \
(node) = symtab->next_function ((node)))
 
+/* Walk all functions but precompute so a node can be deleted if needed.  */
+#define FOR_EACH_FUNCTION_REMOVABLE(node) \
+   cgraph_node *next; \
+   for ((node) = symtab->first_function (), \
+   next = (node) ? symtab->next_

[gcc(refs/users/alfierichards/heads/fmv_c)] Add clone_identifier function.

2025-04-07 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:93a83a8e51a92699948ba322c22d071e89d0eedf

commit 93a83a8e51a92699948ba322c22d071e89d0eedf
Author: Alfie Richards 
Date:   Fri Jan 31 11:47:57 2025 +

Add clone_identifier function.

This is similar to clone_function_name and its siblings but takes an
identifier tree node rather than a function declaration.

This is to be used in conjunction with the identifier node stored in
cgraph_function_version_info::assembler_name to mangle FMV functions in
later patches.

gcc/ChangeLog:

* cgraph.h (clone_identifier): New function.
* cgraphclones.cc (clone_identifier): New function.
clone_function_name: Refactored to use clone_identifier.

Diff:
---
 gcc/cgraph.h|  1 +
 gcc/cgraphclones.cc | 16 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 91e5de30f98c..8dcc9315a51a 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -2629,6 +2629,7 @@ tree clone_function_name (const char *name, const char 
*suffix,
 tree clone_function_name (tree decl, const char *suffix,
  unsigned long number);
 tree clone_function_name (tree decl, const char *suffix);
+tree clone_identifier (tree decl, const char *suffix);
 
 void tree_function_versioning (tree, tree, vec *,
   ipa_param_adjustments *,
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index 5332a4333173..6b650849a63c 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -557,6 +557,14 @@ clone_function_name (tree decl, const char *suffix)
   /* For consistency this needs to behave the same way as
  ASM_FORMAT_PRIVATE_NAME does, but without the final number
  suffix.  */
+  return clone_identifier (identifier, suffix);
+}
+
+/* Return a new clone of ID ending with the string SUFFIX.  */
+
+tree
+clone_identifier (tree id, const char *suffix)
+{
   char *separator = XALLOCAVEC (char, 2);
   separator[0] = symbol_table::symbol_suffix_separator ();
   separator[1] = 0;
@@ -565,15 +573,11 @@ clone_function_name (tree decl, const char *suffix)
 #else
   const char *prefix = "";
 #endif
-  char *result = ACONCAT ((prefix,
-  IDENTIFIER_POINTER (identifier),
-  separator,
-  suffix,
-  (char*)0));
+  char *result = ACONCAT (
+(prefix, IDENTIFIER_POINTER (id), separator, suffix, (char *) 0));
   return get_identifier (result);
 }
 
-
 /* Create callgraph node clone with new declaration.  The actual body will be
copied later at compilation stage.  The name of the new clone will be
constructed from the name of the original node, SUFFIX and NUM_SUFFIX.


[gcc] Created branch 'meissner/heads/work200-dmf' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-dmf' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc] Created branch 'meissner/heads/work200-vpair' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-vpair' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc r15-9245] LoongArch: Add LoongArch architecture detection to __float128 support in libgfortran and libquadmath

2025-04-07 Thread LuluCheng via Gcc-cvs
https://gcc.gnu.org/g:1534f0099c98ea14c08a401302b05edf2231f411

commit r15-9245-g1534f0099c98ea14c08a401302b05edf2231f411
Author: Lulu Cheng 
Date:   Mon Apr 7 10:00:27 2025 +0800

LoongArch: Add LoongArch architecture detection to __float128 support in 
libgfortran and libquadmath [PR119408].

In GCC14, LoongArch added __float128 as an alias for _Float128.
In commit r15-8962, support for q/Q suffixes for 128-bit floating point
numbers.  This will cause the compiler to automatically link libquadmath
when compiling Fortran programs.  But on LoongArch `long double` is
IEEE quad, so there is no need to implement libquadmath.
This causes link failure.

PR target/119408

libgfortran/ChangeLog:

* acinclude.m4: When checking for __float128 support, determine
whether the current architecture is LoongArch.  If so, return false.
* configure: Regenerate.

libquadmath/ChangeLog:

* configure.ac: When checking for __float128 support, determine
whether the current architecture is LoongArch.  If so, return false.
* configure: Regenerate.

Sigend-off-by: Xi Ruoyao 
Sigend-off-by: Jakub Jelinek 

Diff:
---
 libgfortran/acinclude.m4 | 4 
 libgfortran/configure| 8 
 libquadmath/configure| 8 
 libquadmath/configure.ac | 4 
 4 files changed, 24 insertions(+)

diff --git a/libgfortran/acinclude.m4 b/libgfortran/acinclude.m4
index a73207e54656..23fd621e5188 100644
--- a/libgfortran/acinclude.m4
+++ b/libgfortran/acinclude.m4
@@ -274,6 +274,10 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
   AC_CACHE_CHECK([whether we have a usable _Float128 type],
  libgfor_cv_have_float128, [
GCC_TRY_COMPILE_OR_LINK([
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
diff --git a/libgfortran/configure b/libgfortran/configure
index 11a1bc5f0708..9898a94a372a 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -30283,6 +30283,10 @@ else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
@@ -30336,6 +30340,10 @@ fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 _Float128 foo (_Float128 x)
 {
  _Complex _Float128 z1, z2;
diff --git a/libquadmath/configure b/libquadmath/configure
index 49d70809218c..f82dd3d0d6d4 100755
--- a/libquadmath/configure
+++ b/libquadmath/configure
@@ -12843,6 +12843,10 @@ else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else
@@ -12894,6 +12898,10 @@ fi
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else
diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
index 349be2607c64..c64a84892191 100644
--- a/libquadmath/configure.ac
+++ b/libquadmath/configure.ac
@@ -233,6 +233,10 @@ AM_CONDITIONAL(LIBQUAD_USE_SYMVER_SUN, [test 
"x$quadmath_use_symver" = xsun])
 
 AC_CACHE_CHECK([whether __float128 is supported], [libquad_cv_have_float128],
   [GCC_TRY_COMPILE_OR_LINK([
+#ifdef __loongarch__
+#error  On LoongArch we should use long double instead; __float128 is only 
for porting existing code easier.
+#endif
+
 #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) __complex128;
 #else


[gcc r15-9262] c++: constinit and value-initialization [PR119652]

2025-04-07 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:c7dc9b6f889fa8f9e4ef060c3af107eaf54265c5

commit r15-9262-gc7dc9b6f889fa8f9e4ef060c3af107eaf54265c5
Author: Jason Merrill 
Date:   Mon Apr 7 11:49:19 2025 -0400

c++: constinit and value-initialization [PR119652]

Value-initialization built an AGGR_INIT_EXPR to set AGGR_INIT_ZERO_FIRST on.
Passing that AGGR_INIT_EXPR to maybe_constant_value returned a TARGET_EXPR,
which potential_constant_expression_1 mistook for a temporary.

We shouldn't add a TARGET_EXPR to the AGGR_INIT_EXPR in this case, just like
we already avoid adding it to CONSTRUCTOR or CALL_EXPR.

PR c++/119652

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_outermost_constant_expr): Also don't add a
TARGET_EXPR around AGGR_INIT_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constinit20.C: New test.

Diff:
---
 gcc/cp/constexpr.cc  |  3 ++-
 gcc/testsuite/g++.dg/cpp2a/constinit20.C | 18 ++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 37ea65cb6550..497f64f3ceaa 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -9316,7 +9316,8 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   if (TREE_CODE (t) == TARGET_EXPR
  && TARGET_EXPR_INITIAL (t) == r)
return t;
-  else if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == CALL_EXPR)
+  else if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == CALL_EXPR
+  || TREE_CODE (t) == AGGR_INIT_EXPR)
/* Don't add a TARGET_EXPR if our argument didn't have one.  */;
   else if (TREE_CODE (t) == TARGET_EXPR && TARGET_EXPR_CLEANUP (t))
r = get_target_expr (r);
diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit20.C 
b/gcc/testsuite/g++.dg/cpp2a/constinit20.C
new file mode 100644
index ..9b043917dc36
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/constinit20.C
@@ -0,0 +1,18 @@
+// PR c++/119652
+// { dg-do compile { target c++20 } }
+
+struct __shared_count {
+  constexpr __shared_count() {}
+  ~__shared_count();
+  int _M_pi = 0;
+};
+struct shared_ptr {
+  __shared_count _M_refcount;
+};
+struct A {
+  A() = default;
+  shared_ptr m;
+};
+constinit A a;
+constinit A b {};
+constinit A c = {};


[gcc r14-11534] c++: __FUNCTION__ in lambda return type [PR118629]

2025-04-07 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:e2a178a5110db3b4c25773d386b047b27679bfe5

commit r14-11534-ge2a178a5110db3b4c25773d386b047b27679bfe5
Author: Jason Merrill 
Date:   Fri Apr 4 17:34:08 2025 -0400

c++: __FUNCTION__ in lambda return type [PR118629]

In this testcase, the use of __FUNCTION__ is within a function parameter
scope, the lambda's.  And P1787 changed __func__ to live in the parameter
scope.  But [basic.scope.pdecl] says that the point of declaration of
__func__ is immediately before {, so in the trailing return type it isn't in
scope yet, so this __FUNCTION__ should refer to foo().

Looking first for a block scope, then a function parameter scope, gives us
the right result.

PR c++/118629

gcc/cp/ChangeLog:

* name-lookup.cc (pushdecl_outermost_localscope): Look for an
sk_block.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/lambda/lambda-__func__3.C: New test.

(cherry picked from commit 7d561820525fd3b9d8f3876333c0584d75e7c053)

Diff:
---
 gcc/cp/name-lookup.cc| 8 +---
 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-__func__3.C | 6 ++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc
index 2e203e5ca0c8..53528cafa8f4 100644
--- a/gcc/cp/name-lookup.cc
+++ b/gcc/cp/name-lookup.cc
@@ -5125,9 +5125,11 @@ pushdecl_outermost_localscope (tree x)
   cp_binding_level *b = NULL;
   auto_cond_timevar tv (TV_NAME_LOOKUP);
 
-  /* Find the scope just inside the function parms.  */
-  for (cp_binding_level *n = current_binding_level;
-   n->kind != sk_function_parms; n = b->level_chain)
+  /* Find the block scope just inside the function parms.  */
+  cp_binding_level *n = current_binding_level;
+  while (n && n->kind != sk_block)
+n = n->level_chain;
+  for (; n && n->kind != sk_function_parms; n = b->level_chain)
 b = n;
 
   return b ? do_pushdecl_with_scope (x, b) : error_mark_node;
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-__func__3.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-__func__3.C
new file mode 100644
index ..50ad6e55c1b1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-__func__3.C
@@ -0,0 +1,6 @@
+// PR c++/118629
+// { dg-do compile { target c++11 } }
+
+void foo() {
+  []() -> decltype(+__FUNCTION__) { return nullptr; };
+}


[gcc r15-9244] Ada: Fix wrong 'Access to aliased constrained array of controlled type

2025-04-07 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:f085dbf97ed4445830127c955909ff2b887ded69

commit r15-9244-gf085dbf97ed4445830127c955909ff2b887ded69
Author: Eric Botcazou 
Date:   Mon Apr 7 10:33:52 2025 +0200

Ada: Fix wrong 'Access to aliased constrained array of controlled type

For technical reasons, the recently reimplemented finalization machinery
for controlled types requires arrays of controlled types to be allocated
with their bounds, including in the case where their nominal subtype is
constrained.  However, in this case, the type of 'Access for the arrays
is pointer-to-constrained-array and, therefore, its value must designate
the array itself and not the bounds.

gcc/ada/
* gcc-interface/utils.cc (convert) : Use fold_convert
to convert between thin pointers.  If the source is a thin pointer
with zero offset from the base and the target is a pointer to its
array, displace the pointer after converting it.
* gcc-interface/utils2.cc (build_unary_op) : Use
fold_convert to convert the address before displacing it.

Diff:
---
 gcc/ada/gcc-interface/utils.cc  | 17 -
 gcc/ada/gcc-interface/utils2.cc | 11 ++-
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index 1448716acc5a..9212827aecfe 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -5259,7 +5259,7 @@ convert (tree type, tree expr)
  : size_zero_node;
  tree byte_diff = size_diffop (type_pos, etype_pos);
 
- expr = build1 (NOP_EXPR, type, expr);
+ expr = fold_convert (type, expr);
  if (integer_zerop (byte_diff))
return expr;
 
@@ -5267,6 +5267,21 @@ convert (tree type, tree expr)
  fold_convert (sizetype, byte_diff));
}
 
+  /* If converting from a thin pointer with zero offset from the base to
+a pointer to the array, add the offset of the array field.  */
+  if (TYPE_IS_THIN_POINTER_P (etype)
+ && !TYPE_UNCONSTRAINED_ARRAY (TREE_TYPE (etype)))
+   {
+ tree arr_field = DECL_CHAIN (TYPE_FIELDS (TREE_TYPE (etype)));
+
+ if (TREE_TYPE (type) == TREE_TYPE (arr_field))
+   {
+ expr = fold_convert (type, expr);
+ return build_binary_op (POINTER_PLUS_EXPR, type, expr,
+ byte_position (arr_field));
+   }
+   }
+
   /* If converting fat pointer to normal or thin pointer, get the pointer
 to the array and then convert it.  */
   if (TYPE_IS_FAT_POINTER_P (etype))
diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index 99e592781f5e..58418ea7236b 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -1628,11 +1628,12 @@ build_unary_op (enum tree_code op_code, tree 
result_type, tree operand)
= size_binop (PLUS_EXPR, offset,
  size_int (bits_to_bytes_round_down (bitpos)));
 
- /* Take the address of INNER, convert it to a pointer to our type
-and add the offset.  */
- inner = build_unary_op (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (operand)),
- inner);
+ /* Take the address of INNER, formally convert it to a pointer
+to the operand type, and finally add the offset.  */
+ inner = build_unary_op (ADDR_EXPR, NULL_TREE, inner);
+ inner
+   = fold_convert (build_pointer_type (TREE_TYPE (operand)),
+   inner);
  result = build_binary_op (POINTER_PLUS_EXPR, TREE_TYPE (inner),
inner, offset);
  break;


[gcc r15-9257] cobol: Address some iconv issues.

2025-04-07 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:6f6c2694dc7c07bca5086c77ef29f3793e0c61c2

commit r15-9257-g6f6c2694dc7c07bca5086c77ef29f3793e0c61c2
Author: Iain Sandoe 
Date:   Sun Apr 6 14:56:20 2025 +0100

cobol: Address some iconv issues.

Darwin/macOS installed libiconv does not accept // trailers on
conversion codes; this causes the init_iconv to fail - and then
that SEGVs later.

Remove the trailing // as it is not needed elsewhere.
Also print a warning if we fail to init the conversion.

gcc/cobol/ChangeLog:

* symbols.cc : Remove trailing // on standard_internal.
(cbl_field_t::internalize): Print a warning if we fail to
initialise iconv.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/cobol/symbols.cc | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc
index 50431251a0f1..4067459858c3 100644
--- a/gcc/cobol/symbols.cc
+++ b/gcc/cobol/symbols.cc
@@ -3567,7 +3567,7 @@ cbl_field_t::is_ascii() const {
  * compilation, if it moves off the default, it adjusts only once, and
  * never reverts.
  */
-static const char standard_internal[] = "CP1252//";
+static const char standard_internal[] = "CP1252";
 extern os_locale_t os_locale;
 
 static const char *
@@ -3595,6 +3595,10 @@ cbl_field_t::internalize() {
   static  iconv_t cd = iconv_open(tocode, fromcode);
   static const size_t noconv = size_t(-1);
 
+  if (cd == (iconv_t)-1) {
+yywarn("failed iconv_open tocode = '%s' fromcode = %s", tocode, fromcode);
+  }
+
   // Sat Mar 16 11:45:08 2024: require temporary environment for testing
   if( getenv( "INTERNALIZE_NO") ) return data.initial;


[gcc] Created branch 'meissner/heads/work200-bugs' in namespace 'refs/users'

2025-04-07 Thread Michael Meissner via Gcc-cvs
The branch 'meissner/heads/work200-bugs' was created in namespace 'refs/users' 
pointing to:

 150f967ed97a... Add ChangeLog.meissner and REVISION.


[gcc r15-9246] tailc: Extend the IPA-VRP workaround [PR119614]

2025-04-07 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:e7c3a7ccd6209c1a906bdf59207f0fa4258b692b

commit r15-9246-ge7c3a7ccd6209c1a906bdf59207f0fa4258b692b
Author: Jakub Jelinek 
Date:   Mon Apr 7 11:57:36 2025 +0200

tailc: Extend the IPA-VRP workaround [PR119614]

The IPA-VRP workaround in the tailc/musttail passes was just comparing
the singleton constant from a tail call candidate return with the ret_val.
This unfortunately doesn't work in the following testcase, where we have
   [local count: 152205050]:
  baz (); [must tail call]
  goto ; [100.00%]

   [local count: 762356696]:
  _8 = foo ();

   [local count: 1073741824]:
  # _3 = PHI <0B(4), _8(6)>
  return _3;
and in the unreduced testcase even more PHIs before we reach the return
stmt.

Normally when the call has lhs, whenever we follow a (non-EH) successor
edge, it calls propagate_through_phis and that walks the PHIs in the
destination bb of the edge and when it sees a PHI whose argument matches
that of the currently tracked value (ass_var), it updates ass_var to
PHI result of that PHI.  I think it is theoretically dangerous that it
picks the first one, perhaps there could be multiple PHIs, so perhaps safer
would be walk backwards from the return value up to the call.

Anyway, this PR is about the IPA-VRP workaround, there ass_var is NULL
because the potential tail call has no lhs, but ret_var is not TREE_CONSTANT
but SSA_NAME with PHI as SSA_NAME_DEF_STMT.  The following patch handles
it by pushing the edges we've walked through when ass_var is NULL into a
vector and if ret_var is SSA_NAME set to PHI result, it attempts to walk
back from the ret_var through arguments of PHIs corresponding to the
edges we've walked back until we reach a constant and compare that constant
against the singleton value as well.

2025-04-07  Jakub Jelinek  

PR tree-optimization/119614
* tree-tailcall.cc (find_tail_calls): Remember edges which have been
walked through if !ass_var.  Perform IPA-VRP workaround even when
ret_var is not TREE_CONSTANT, in that case check in a loop if it is
a PHI result and in that case look at the PHI argument from
corresponding edge in the edge vector.

* g++.dg/opt/pr119613.C: Change { c || c++11 } in obviously C++ only
test to just c++11.
* g++.dg/opt/pr119614.C: New test.

Diff:
---
 gcc/testsuite/g++.dg/opt/pr119613.C |  2 +-
 gcc/testsuite/g++.dg/opt/pr119614.C | 30 +++
 gcc/tree-tailcall.cc| 47 -
 3 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/g++.dg/opt/pr119613.C 
b/gcc/testsuite/g++.dg/opt/pr119613.C
index 432a30cdcdb0..2ced2e8fa2a0 100644
--- a/gcc/testsuite/g++.dg/opt/pr119613.C
+++ b/gcc/testsuite/g++.dg/opt/pr119613.C
@@ -1,5 +1,5 @@
 // PR middle-end/119613
-// { dg-do compile { target { musttail && { c || c++11 } } } }
+// { dg-do compile { target { musttail && c++11 } } }
 // { dg-options "-O0" }
 
 struct S { S () {} };
diff --git a/gcc/testsuite/g++.dg/opt/pr119614.C 
b/gcc/testsuite/g++.dg/opt/pr119614.C
new file mode 100644
index ..cb73fc3ec09d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr119614.C
@@ -0,0 +1,30 @@
+// PR tree-optimization/119614
+// { dg-do compile { target musttail } }
+// { dg-options "-O2" }
+
+struct S {} b;
+char *foo ();
+int e, g;
+void bar ();
+void corge (S);
+
+[[gnu::noinline]] char *
+baz ()
+{
+  bar ();
+  return 0;
+}
+
+const char *
+qux ()
+{
+  if (e)
+{
+  S a = b;
+  corge (a);
+  if (g)
+return 0;
+  [[gnu::musttail]] return baz ();
+}
+  return foo ();
+}
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index c8740f9353e2..f51bb970e329 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -920,6 +920,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail,
   auto_bitmap to_move_defs;
   auto_vec to_move_stmts;
   bool is_noreturn = gimple_call_noreturn_p (call);
+  auto_vec edges;
 
   abb = bb;
   agsi = gsi;
@@ -933,6 +934,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail,
{
  edge e = single_non_eh_succ_edge (abb);
  ass_var = propagate_through_phis (ass_var, e);
+ if (!ass_var)
+   edges.safe_push (e);
  abb = e->dest;
  agsi = gsi_start_bb (abb);
}
@@ -1040,9 +1043,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail,
   /* If IPA-VRP proves called function always returns a singleton range,
 the return value is replaced by the only value in that range.
 For tail call purposes, pretend such replacement didn't happen.  */
-  if (ass_var == NULL_TREE
- && !tail_recursion
- && TREE_CONSTANT (ret_var))
+ 

[gcc r15-9248] AVRrc: Tweak __[u]mulhisi3.

2025-04-07 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:29cc92348e0d32b7d301eae42aaebd716736f037

commit r15-9248-g29cc92348e0d32b7d301eae42aaebd716736f037
Author: Georg-Johann Lay 
Date:   Mon Apr 7 12:39:41 2025 +0200

AVRrc: Tweak __[u]mulhisi3.

When MUL is not available, then the __umulhisi3 and __mulhisi3
functions can use __mulhisi3_helper.  This improves code size,
stack footprint and runtime on AVRrc.

libgcc/
* config/avr/lib1funcs.S (__mulhisi3, __umulhisi3): Use
__mulhisi3_helper for better performance on AVRrc.

Diff:
---
 libgcc/config/avr/lib1funcs.S | 71 ++-
 1 file changed, 22 insertions(+), 49 deletions(-)

diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 52ce051e00fe..dfe99b1ea06f 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -395,29 +395,23 @@ ENDF  __mulhi3
 
 #if defined (L_umulhisi3)
 DEFUN __umulhisi3
-#ifndef __AVR_TINY__
+#ifdef __AVR_TINY__
+;; Save callee saved regs.
+pushB0
+pushB1
+#endif /* AVR_TINY */
 wmovB0, 24
 ;; Zero-extend B
 clr B2
 clr B3
 ;; Zero-extend A
 wmovA2, B2
-XJMP__mulsi3
+#ifdef __AVR_TINY__
+;; Clear hi16 of the result so we can use __mulsi3_helper.
+wmovCC2, B2
+XJMP__mulsi3_helper
 #else
-;; Push zero-extended R24
-push__zero_reg__
-push__zero_reg__
-pushr25
-pushr24
-;; Zero-extend R22
-clr R24
-clr R25
-XCALL   __mulsi3
-pop __tmp_reg__
-pop __tmp_reg__
-pop __tmp_reg__
-pop __tmp_reg__
-ret
+XJMP__mulsi3
 #endif /* AVR_TINY? */
 ENDF __umulhisi3
 #endif /* L_umulhisi3 */
@@ -425,54 +419,33 @@ ENDF __umulhisi3
 #if defined (L_mulhisi3)
 DEFUN __mulhisi3
 #ifdef __AVR_TINY__
-;; Push sign-extended R24
-mov __tmp_reg__, r25
-lsl __tmp_reg__
-sbc __tmp_reg__, __tmp_reg__
-push__tmp_reg__
-push__tmp_reg__
-pushr25
-pushr24
-;;  Sign-extend R22
-mov r24, r23
-lsl r24
-sbc r24, r24
-sbc r25, r25
-XCALL   __mulsi3
-pop __tmp_reg__
-pop __tmp_reg__
-pop __tmp_reg__
-pop __tmp_reg__
-ret
-#else
+;; Save callee saved regs.
+pushB0
+pushB1
+#endif /* AVR_TINY */
 wmovB0, 24
 ;; Sign-extend B
 lsl r25
 sbc B2, B2
 mov B3, B2
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-;; Sign-extend A
-clr A2
-sbrcA1, 7
-com A2
-mov A3, A2
-XJMP__mulsi3
-#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
 ;; Zero-extend A and __mulsi3 will run at least twice as fast
 ;; compared to a sign-extended A.
 clr A2
 clr A3
+;; Clear hi16 of the result so we can use __mulsi3_helper.
+wmovCC2, A2
 sbrsA1, 7
-XJMP __mulsi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+rjmp 1f
+#else
+XJMP__mulsi3_helper
+#endif /* ERRATA_SKIP */
 ;; If  A < 0  then perform the  B * 0x before the
 ;; very multiplication by initializing the high part of the
 ;; result CC with -B.
-wmovCC2, A2
 sub CC2, B0
 sbc CC3, B1
-XJMP __mulsi3_helper
-#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
-#endif /* AVR_TINY? */
+1:  XJMP__mulsi3_helper
 ENDF __mulhisi3
 #endif /* L_mulhisi3 */