[gcc r15-7804] Fortran: Prevent ICE when getting caf-token from abstract type [PR77872]

2025-03-04 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:5bd664838398980f1c8af60a946947ff83744fcc

commit r15-7804-g5bd664838398980f1c8af60a946947ff83744fcc
Author: Andre Vehreschild 
Date:   Mon Mar 3 14:42:28 2025 +0100

Fortran: Prevent ICE when getting caf-token from abstract type [PR77872]

PR fortran/77872

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_get_tree_for_caf_expr): Pick up token from
decl when it is present there for class types.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/class_1.f90: New test.

Diff:
---
 gcc/fortran/trans-expr.cc |  5 +
 gcc/testsuite/gfortran.dg/coarray/class_1.f90 | 16 
 2 files changed, 21 insertions(+)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 7c0b17428cdd..0d790b63f956 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -2394,6 +2394,11 @@ gfc_get_tree_for_caf_expr (gfc_expr *expr)
  if (CLASS_DATA (expr->symtree->n.sym)->attr.codimension)
return caf_decl;
}
+  else if (DECL_P (caf_decl) && DECL_LANG_SPECIFIC (caf_decl)
+  && GFC_DECL_TOKEN (caf_decl)
+  && CLASS_DATA (expr->symtree->n.sym)->attr.codimension)
+   return caf_decl;
+
   for (ref = expr->ref; ref; ref = ref->next)
{
  if (ref->type == REF_COMPONENT
diff --git a/gcc/testsuite/gfortran.dg/coarray/class_1.f90 
b/gcc/testsuite/gfortran.dg/coarray/class_1.f90
new file mode 100644
index ..fa70b1d61629
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/class_1.f90
@@ -0,0 +1,16 @@
+!{ dg-do compile }
+!
+! Compiling the call x%f() ICEd.  Check it's fixed.
+! Contributed by Gerhard Steinmetz  
+
+module pr77872_abs
+   type, abstract :: t
+   contains
+  procedure(s), pass, deferred :: f
+   end type
+contains
+   subroutine s(x)
+  class(t) :: x[*]
+  call x%f()
+   end
+end module pr77872_abs


[gcc r15-7803] Fortran: Reduce code complexity [PR77872]

2025-03-04 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:ef605e106c6075bfe2a5625add7185a9a3f722b1

commit r15-7803-gef605e106c6075bfe2a5625add7185a9a3f722b1
Author: Andre Vehreschild 
Date:   Mon Mar 3 10:41:05 2025 +0100

Fortran: Reduce code complexity [PR77872]

PR fortran/77872

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_conv_procedure_call): Use attr instead of
doing type check and branching for BT_CLASS.

Diff:
---
 gcc/fortran/trans-expr.cc | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index e619013f261e..7c0b17428cdd 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -8216,23 +8216,15 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
   /* For descriptorless coarrays and assumed-shape coarray dummies, we
 pass the token and the offset as additional arguments.  */
   if (fsym && e == NULL && flag_coarray == GFC_FCOARRAY_LIB
- && ((fsym->ts.type != BT_CLASS && fsym->attr.codimension
-  && !fsym->attr.allocatable)
- || (fsym->ts.type == BT_CLASS
- && CLASS_DATA (fsym)->attr.codimension
- && !CLASS_DATA (fsym)->attr.allocatable)))
+ && attr->codimension && !attr->allocatable)
{
  /* Token and offset.  */
  vec_safe_push (stringargs, null_pointer_node);
  vec_safe_push (stringargs, build_int_cst (gfc_array_index_type, 0));
  gcc_assert (fsym->attr.optional);
}
-  else if (fsym && flag_coarray == GFC_FCOARRAY_LIB
-  && ((fsym->ts.type != BT_CLASS && fsym->attr.codimension
-   && !fsym->attr.allocatable)
-  || (fsym->ts.type == BT_CLASS
-  && CLASS_DATA (fsym)->attr.codimension
-  && !CLASS_DATA (fsym)->attr.allocatable)))
+  else if (fsym && flag_coarray == GFC_FCOARRAY_LIB && attr->codimension
+  && !attr->allocatable)
{
  tree caf_decl, caf_type, caf_desc = NULL_TREE;
  tree offset, tmp2;


[gcc r14-11375] ipa-sra: Avoid clashes with ipa-cp when pulling accesses across calls (PR 118243)

2025-03-04 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:455ea90d6e5ed2938fb7cc7008bf738dcbbc72d4

commit r14-11375-g455ea90d6e5ed2938fb7cc7008bf738dcbbc72d4
Author: Martin Jambor 
Date:   Tue Mar 4 14:53:41 2025 +0100

ipa-sra: Avoid clashes with ipa-cp when pulling accesses across calls (PR 
118243)

Among other things, IPA-SRA checks whether splitting out a bit of an
aggregate or something passed by reference would lead into a clash
with an already known IPA-CP constant a way which would cause problems
later on.  Unfortunately the test is done only in
adjust_parameter_descriptions and is missing when accesses are
propagated from callees to callers, which leads to miscompilation
reported as PR 118243 (where the callee is a function created by
ipa-split).

The matter is then further complicated by the fact that we consider
complex numbers as scalars even though they can be modified piecemeal
(IPA-CP can detect and propagate the pieces separately too) which then
confuses the parameter manipulation machinery furter.

This patch simply adds the missing check to avoid the IPA-SRA
transform in these cases too, which should be suitable for backporting
to all affected release branches.  It is a bit of a shame as in the PR
testcase we do propagate both components of the complex number in
question and the transformation phase could recover.  I have some
prototype patches in this direction but that is something for (a)
stage 1.

gcc/ChangeLog:

2025-02-10  Martin Jambor  

PR ipa/118243
* ipa-sra.cc (pull_accesses_from_callee): New parameters
caller_ipcp_ts and param_idx.  Check that scalar pulled accesses 
would
not clash with a known IPA-CP aggregate constant.
(param_splitting_across_edge): Pass IPA-CP transformation summary 
and
caller parameter index to pull_accesses_from_callee.

gcc/testsuite/ChangeLog:

2025-02-10  Martin Jambor  

PR ipa/118243
* g++.dg/ipa/pr118243.C: New test.

(cherry picked from commit 0bffcd469e68d68ba9c724f515651deff8494b82)

Diff:
---
 gcc/ipa-sra.cc  | 38 +--
 gcc/testsuite/g++.dg/ipa/pr118243.C | 40 +
 2 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 6d6da4089251..25fbccd03480 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -3640,15 +3640,19 @@ enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, 
ACC_PROP_CERTAIN};
 
 /* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
(which belongs to CALLER) if they would not violate some constraint there.
-   If successful, return NULL, otherwise return the string reason for failure
-   (which can be written to the dump file).  DELTA_OFFSET is the known offset
-   of the actual argument withing the formal parameter (so of ARG_DESCS within
-   PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
-   known. In case of success, set *CHANGE_P to true if propagation actually
-   changed anything.  */
+   CALLER_IPCP_TS describes the caller, PARAM_IDX is the index of the parameter
+   described by PARAM_DESC.  If successful, return NULL, otherwise return the
+   string reason for failure (which can be written to the dump file).
+   DELTA_OFFSET is the known offset of the actual argument withing the formal
+   parameter (so of ARG_DESCS within PARAM_DESCS), ARG_SIZE is the size of the
+   actual argument or zero, if not known. In case of success, set *CHANGE_P to
+   true if propagation actually changed anything.  */
 
 static const char *
-pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
+pull_accesses_from_callee (cgraph_node *caller,
+  ipcp_transformation *caller_ipcp_ts,
+  int param_idx,
+  isra_param_desc *param_desc,
   isra_param_desc *arg_desc,
   unsigned delta_offset, unsigned arg_size,
   bool *change_p)
@@ -3673,6 +3677,17 @@ pull_accesses_from_callee (cgraph_node *caller, 
isra_param_desc *param_desc,
continue;
 
   unsigned offset = argacc->unit_offset + delta_offset;
+
+  if (caller_ipcp_ts && !AGGREGATE_TYPE_P (argacc->type))
+   {
+ ipa_argagg_value_list avl (caller_ipcp_ts);
+ tree value = avl.get_value (param_idx, offset);
+ if (value && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value)))
+/ BITS_PER_UNIT)
+   != argacc->unit_size))
+   return " propagated access would conflict with an IPA-CP constant";
+   }
+
   /* Given that accesses are initially stored according to increasing
 offset and decreasing size in case of equal offsets, the following
 searches could 

[gcc r14-11377] testsuite: Add tests for already fixed PR [PR119071]

2025-03-04 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:e79fc5a7bc9958c95fd09928188f5aa6c051d4c1

commit r14-11377-ge79fc5a7bc9958c95fd09928188f5aa6c051d4c1
Author: Jakub Jelinek 
Date:   Tue Mar 4 09:52:22 2025 +0100

testsuite: Add tests for already fixed PR [PR119071]

Uros' r15-7793 fixed this PR as well, I'm just committing tests
from the PR so that it can be closed.

2025-03-04  Jakub Jelinek  

PR rtl-optimization/119071
* gcc.dg/pr119071.c: New test.
* gcc.c-torture/execute/pr119071.c: New test.

(cherry picked from commit ccf9db9a6fa4b5bc7aad5e9603e2ac71984142a0)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr119071.c | 15 +
 gcc/testsuite/gcc.dg/pr119071.c| 45 ++
 2 files changed, 60 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr119071.c 
b/gcc/testsuite/gcc.c-torture/execute/pr119071.c
new file mode 100644
index ..91f29cce3d55
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr119071.c
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/119071 */
+
+int a, b;
+
+int
+main ()
+{
+  int c = 0;
+  if (a + 2)
+c = 1;
+  int d = (1 + c - 2 + c == 1) - 1;
+  b = ((d + 1) << d) + d;
+  if (b != 1)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/pr119071.c b/gcc/testsuite/gcc.dg/pr119071.c
new file mode 100644
index ..ade1d288d2ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr119071.c
@@ -0,0 +1,45 @@
+/* PR rtl-optimization/119071 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fgimple" } */
+
+int a, b;
+
+int __GIMPLE (ssa,startwith("expand"))
+foo (void)
+{
+  int _1;
+  int _2;
+  int _3;
+  int _5;
+  _Bool _7;
+  int _8;
+  int _9;
+  _Bool _14;
+  int _15;
+  int _16;
+  _Bool _17;
+  int _18;
+
+  __BB(2):
+  _1 = a;
+  _17 = _1 != _Literal (int) -2;
+  _18 = (int) _17;
+  _2 = _18 + _Literal (int) -1;
+  _3 = _2 + _18;
+  _14 = _3 != 1;
+  _15 = (int) _14;
+  _16 = -_15;
+  _7 = _3 == 1;
+  _9 = (int) _7;
+  _5 = _9 << _16;
+  _8 = _5 - _15;
+  b = _8;
+  return _8;
+}
+
+int
+main ()
+{
+  if (foo () != 1)
+__builtin_abort ();
+}


[gcc r14-11376] combine: Discard REG_UNUSED note in i2 when register is also referenced in i3 [PR118739]

2025-03-04 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:b7b8307893ef0b6a2a0565d75af831cca4c9511a

commit r14-11376-gb7b8307893ef0b6a2a0565d75af831cca4c9511a
Author: Uros Bizjak 
Date:   Wed Feb 12 11:19:57 2025 +0100

combine: Discard REG_UNUSED note in i2 when register is also referenced in 
i3 [PR118739]

The combine pass is trying to combine:

Trying 16, 22, 21 -> 23:
   16: r104:QI=flags:CCNO>0
   22: {r120:QI=r104:QI^0x1;clobber flags:CC;}
  REG_UNUSED flags:CC
   21: r119:QI=flags:CCNO<=0
  REG_DEAD flags:CCNO
   23: {r110:QI=r119:QI|r120:QI;clobber flags:CC;}
  REG_DEAD r120:QI
  REG_DEAD r119:QI
  REG_UNUSED flags:CC

and creates the following two insn sequence:

modifying insn i222: r104:QI=flags:CCNO>0
  REG_DEAD flags:CC
deferring rescan insn with uid = 22.
modifying insn i323: r110:QI=flags:CCNO<=0
  REG_DEAD flags:CC
deferring rescan insn with uid = 23.

where the REG_DEAD note in i2 is not correct, because the flags
register is still referenced in i3.  In try_combine() megafunction,
we have this part:

--cut here--
/* Distribute all the LOG_LINKS and REG_NOTES from I1, I2, and I3.  */
if (i3notes)
  distribute_notes (i3notes, i3, i3, newi2pat ? i2 : NULL,
elim_i2, elim_i1, elim_i0);
if (i2notes)
  distribute_notes (i2notes, i2, i3, newi2pat ? i2 : NULL,
elim_i2, elim_i1, elim_i0);
if (i1notes)
  distribute_notes (i1notes, i1, i3, newi2pat ? i2 : NULL,
elim_i2, local_elim_i1, local_elim_i0);
if (i0notes)
  distribute_notes (i0notes, i0, i3, newi2pat ? i2 : NULL,
elim_i2, elim_i1, local_elim_i0);
if (midnotes)
  distribute_notes (midnotes, NULL, i3, newi2pat ? i2 : NULL,
elim_i2, elim_i1, elim_i0);
--cut here--

where the compiler distributes REG_UNUSED note from i2:

   22: {r120:QI=r104:QI^0x1;clobber flags:CC;}
  REG_UNUSED flags:CC

via distribute_notes() using the following:

--cut here--
  /* Otherwise, if this register is used by I3, then this register
 now dies here, so we must put a REG_DEAD note here unless there
 is one already.  */
  else if (reg_referenced_p (XEXP (note, 0), PATTERN (i3))
   && ! (REG_P (XEXP (note, 0))
 ? find_regno_note (i3, REG_DEAD,
REGNO (XEXP (note, 0)))
 : find_reg_note (i3, REG_DEAD, XEXP (note, 0
{
  PUT_REG_NOTE_KIND (note, REG_DEAD);
  place = i3;
}
--cut here--

Flags register is used in I3, but there already is a REG_DEAD note in I3.
The above condition doesn't trigger and continues in the "else" part where
REG_DEAD note is put to I2.  The proposed solution corrects the above
logic to trigger every time the register is referenced in I3, avoiding the
"else" part.

PR rtl-optimization/118739

gcc/ChangeLog:

* combine.cc (distribute_notes) : Correct the
logic when the register is used by I3.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr118739.c: New test.

(cherry picked from commit a92dc3fe31c95d56019b2fb95a58414bca06241f)

Diff:
---
 gcc/combine.cc   | 15 +-
 gcc/testsuite/gcc.target/i386/pr118739.c | 50 
 2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index a40b295bd0db..8ce8e0696ff0 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -14485,14 +14485,15 @@ distribute_notes (rtx notes, rtx_insn *from_insn, 
rtx_insn *i3, rtx_insn *i2,
  /* Otherwise, if this register is used by I3, then this register
 now dies here, so we must put a REG_DEAD note here unless there
 is one already.  */
- else if (reg_referenced_p (XEXP (note, 0), PATTERN (i3))
-  && ! (REG_P (XEXP (note, 0))
-? find_regno_note (i3, REG_DEAD,
-   REGNO (XEXP (note, 0)))
-: find_reg_note (i3, REG_DEAD, XEXP (note, 0
+ else if (reg_referenced_p (XEXP (note, 0), PATTERN (i3)))
{
- PUT_REG_NOTE_KIND (note, REG_DEAD);
- place = i3;
+ if (! (REG_P (XEXP (note, 0))
+? find_regno_note (i3, REG_DEAD, REGNO (XEXP (note, 0)))
+: find_reg_note (i3, REG_DEAD, XEXP (note, 0
+   {
+ PUT_REG_NOTE_KIND (note, REG_DEAD);
+ place = i3;
+   }
}
 
 

[gcc r15-7813] Break false dependency chain on Zen5

2025-03-04 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:8c4a00f9a48f1b2af10448c9f2058b44b8cb7234

commit r15-7813-g8c4a00f9a48f1b2af10448c9f2058b44b8cb7234
Author: Jan Hubicka 
Date:   Tue Mar 4 16:22:01 2025 +0100

Break false dependency chain on Zen5

Zen5 on some variants has false dependency on tzcnt, blsi, blsr and blsmsk
instructions.  Those can be tested by the following benchmark

jh@shroud:~> cat ee.c
int
main()
{
   int a = 10;
   int b = 0;
   for (int i = 0; i < 10; i++)
   {
   asm volatile ("xor %0, %0": "=r" (b));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   asm volatile (INST " %2, %0": "=r"(b): "0"(b),"r"(a));
   }
   return 0;
}
jh@shroud:~> cat bmk.sh
gcc ee.c -DBREAK -DINST=\"$1\" -O2 ; time ./a.out ; gcc ee.c -DINST=\"$1\" 
-O2 ; time ./a.out
jh@shroud:~> sh bmk.sh tzcnt

real0m0.886s
user0m0.886s
sys 0m0.000s

real0m0.886s
user0m0.886s
sys 0m0.000s

jh@shroud:~> sh bmk.sh blsi

real0m0.979s
user0m0.979s
sys 0m0.000s

real0m2.418s
user0m2.418s
sys 0m0.000s

jh@shroud:~> sh bmk.sh blsr

real0m0.986s
user0m0.986s
sys 0m0.000s

real0m2.422s
user0m2.421s
sys 0m0.000s
jh@shroud:~> sh bmk.sh blsmsk

real0m0.973s
user0m0.973s
sys 0m0.000s

real0m2.422s
user0m2.422s
sys 0m0.000s

We already have runable that controls tzcnt together with lzcnt and popcnt.
Since it seems that only tzcnt is affected I added new tunable to control 
tzcnt
only.  I also added splitters for blsi/blsr/blsmsk implemented analogously 
to
existing splitter for lzcnt.

The patch is neutral on SPEC. We produce blsi and blsr in some internal 
loops, but
they usually have same destination as source. However it is good to break 
the
dependency chain to avoid patogolical cases and it is quite cheap overall, 
so I
think we want to enable this for generic.  I will send followup patch for 
this.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

gcc/ChangeLog:

* config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_TZCNT): New macro.
(TARGET_AVOID_FALSE_DEP_FOR_BLS): New macro.
* config/i386/i386.md (*bmi_blsi_): Add splitter for false
dependency.
(*bmi_blsi__ccno): Add splitter for false dependency.
(*bmi_blsi__falsedep): New pattern.
(*bmi_blsmsk_): Add splitter for false dependency.
(*bmi_blsmsk__falsedep): New pattern.
(*bmi_blsr_): Add splitter for false dependency.
(*bmi_blsr__cmp): Add splitter for false dependency
(*bmi_blsr__cmp_falsedep): New pattern.
* config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_TZCNT): 
New tune.
(X86_TUNE_AVOID_FALSE_DEP_FOR_BLS): New tune.

gcc/testsuite/ChangeLog:

* gcc.target/i386/blsi.c: New test.
* gcc.target/i386/blsmsk.c: New test.
* gcc.target/i386/blsr.c: New test.

Diff:
---
 gcc/config/i386/i386.h |   4 +
 gcc/config/i386/i386.md| 168 +++--
 gcc/config/i386/x86-tune.def   |  10 ++
 gcc/testsuite/gcc.target/i386/blsi.c   |  26 +
 gcc/testsuite/gcc.target/i386/blsmsk.c |   9 ++
 gcc/testsuite/gcc.target/i386/blsr.c   |  26 +
 6 files changed, 233 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2696bfb3a81e..ce29c272bc0b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -461,6 +461,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
 #define TARGET_AVOID_FALSE_DEP_FOR_BMI \
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
+#define TARGET_AVOID_FALSE_DEP_FOR_TZCNT \
+   ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_TZCNT]
+#define TARGET_AVOID_FALSE_DEP_FOR_BLS \
+   ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BLS]
 #define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
 #define TARGET_AVOID_MFENCE ix86_t

[gcc r15-7814] __builtin_bswapXX: improve docs

2025-03-04 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:5452b50a4ef4236d7d78210a9c16331ae86e67e3

commit r15-7814-g5452b50a4ef4236d7d78210a9c16331ae86e67e3
Author: Oscar Gustafsson 
Date:   Tue Mar 4 15:29:30 2025 +

__builtin_bswapXX: improve docs

gcc/ChangeLog:

* doc/extend.texi: Improve example for __builtin_bswap16.

Diff:
---
 gcc/doc/extend.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ec9bb59900c9..83f6e45170be 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16338,7 +16338,7 @@ Returns the first argument raised to the power of the 
second.  Unlike the
 
 @defbuiltin{uint16_t __builtin_bswap16 (uint16_t @var{x})}
 Returns @var{x} with the order of the bytes reversed; for example,
-@code{0xaabb} becomes @code{0xbbaa}.  Byte here always means
+@code{0xabcd} becomes @code{0xcdab}.  Byte here always means
 exactly 8 bits.
 @enddefbuiltin


[gcc r15-7815] testsuite: arm: Use effective-target for pr68674.c test

2025-03-04 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:879fd9c822633ecf2c62471d1a7f9b9619e296b7

commit r15-7815-g879fd9c822633ecf2c62471d1a7f9b9619e296b7
Author: Torbjörn SVENSSON 
Date:   Fri Nov 8 18:39:32 2024 +0100

testsuite: arm: Use effective-target for pr68674.c test

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr68674.c: Use effective-target arm_arch_v7a
and arm_libc_fp_abi.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/pr68674.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr68674.c 
b/gcc/testsuite/gcc.target/arm/pr68674.c
index 0b3237458fe9..3fd562d05185 100644
--- a/gcc/testsuite/gcc.target/arm/pr68674.c
+++ b/gcc/testsuite/gcc.target/arm/pr68674.c
@@ -1,9 +1,10 @@
 /* PR target/68674 */
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_neon_ok } */
-/* { dg-require-effective-target arm_fp_ok } */
+/* { dg-require-effective-target arm_arch_v7a_ok } */
+/* { dg-require-effective-target arm_libc_fp_abi_ok } */
 /* { dg-options "-O2" } */
-/* { dg-add-options arm_fp } */
+/* { dg-add-options arm_arch_v7a } */
+/* { dg-add-options arm_libc_fp_abi } */
 
 #pragma GCC target ("fpu=vfp")


[gcc r15-7811] Make ix86_macro_fusion_pair_p and ix86_fuse_mov_alu_p match current CPUs

2025-03-04 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:c84be624e079cd748df93a3dc0b5168865fefee9

commit r15-7811-gc84be624e079cd748df93a3dc0b5168865fefee9
Author: Jan Hubicka 
Date:   Mon Mar 3 19:12:20 2025 +0100

Make ix86_macro_fusion_pair_p and ix86_fuse_mov_alu_p match current CPUs

The current implementation of fussion predicates misses some common
fussion cases on zen and more recent cores.  I added knobs for
individual conditionals we test.

 1) I split checks for fusing ALU with conditional operands when the ALU
 has memory operand.  This seems to be supported by zen3+ and by
 tigerlake and coperlake (according to Agner Fog's manual)

 2) znver4 and 5 supports fussion of ALU and conditional even if ALU has
memory and immediate operands.
This seems to be relatively important enabling 25% more fusions on
gcc bootstrap.

 3) no CPU supports fusing when ALU contains IP relative memory
references.  I added separate knob so we do not forger about this if
this gets supoorted later.

The patch does not solve the limitation of sched that fuse pairs must be
adjacent on imput and the first operation must be signle-set.  Fixing
single-set is easy (I have separate patch for this), for non-adjacent
pairs we need bigger surgery.

To verify what CPU really does I made simpe test script.

jh@ryzen3:~> cat fuse-test.c
int b;
const int z = 0;
const int o = 1;
int
main()
{
int a = 10;
int b;
int z = 0;
int o = 1;
asm volatile ("\n"
".L1234:\n"
"nop\n"
"subl   %3, %0\n"

"movl %0, %1\n"
"cmpl %2, %1\n"
"movl %0, %1\n"
"test %1, %1\n"

"nop\n"
"jne.L1234":"=a"(a),
"=m"(b)
"=r"(b)
:
"m"(z),
"m"(o),
"i"(0),
"i"(1),
"0"(a)
);
}
jh@ryzen3:~> cat fuse-test.sh
EVENT=ex_ret_fused_instr
dotest()
{
gcc -O2  fuse-test.c $* -o fuse-cmp-imm-mem-nofuse
perf stat -e $EVENT ./fuse-cmp-imm-mem-nofuse  2>&1 | grep $EVENT
gcc -O2 fuse-test.c -DFUSE $* -o fuse-cmp-imm-mem-fuse
perf stat  -e $EVENT ./fuse-cmp-imm-mem-fuse 2>&1 | grep $EVENT
}

echo ALU with immediate
dotest
echo ALU with memory
dotest -D MEM
echo ALU with IP relative memory
dotest -D MEM -D IPRELATIVE
echo CMP with immediate
dotest -D CMP
echo CMP with memory
dotest -D CMP -D MEM
echo CMP with memory and immediate
dotest -D CMP -D MEMIMM
echo CMP with IP relative memory
dotest -D CMP -D MEM -D IPRELATIVE
echo TEST
dotest -D TEST

On zen5 I get:
ALU with immediate
20,345  ex_ret_fused_instr:u
 1,000,020,278  ex_ret_fused_instr:u
ALU with memory
20,367  ex_ret_fused_instr:u
 1,000,020,290  ex_ret_fused_instr:u
ALU with IP relative memory
20,395  ex_ret_fused_instr:u
20,403  ex_ret_fused_instr:u
CMP with immediate
20,369  ex_ret_fused_instr:u
 1,000,020,301  ex_ret_fused_instr:u
CMP with memory
20,314  ex_ret_fused_instr:u
 1,000,020,341  ex_ret_fused_instr:u
CMP with memory and immediate
20,372  ex_ret_fused_instr:u
 1,000,020,266  ex_ret_fused_instr:u
CMP with IP relative memory
20,382  ex_ret_fused_instr:u
20,369  ex_ret_fused_instr:u
TEST
20,346  ex_ret_fused_instr:u
 1,000,020,301  ex_ret_fused_instr:u

IP relative memory seems to not be documented.

On zen3/4 I get:

ALU with immediate
20,263  ex_ret_fused_instr:u
 1,000,020,051  ex_ret_fused_instr:u
ALU with memory
20,255  ex_ret_fused_instr:u
 1,000,020,056  ex_ret_fused_instr:u
ALU with IP relative memory
20,253  ex_ret_fused_instr:u
20,266  ex_ret_fused_instr:u
CMP with immediate
20,264  ex_ret_fused_instr:u
 1,000,020,052  ex_ret_fused_instr:u
CMP with memory
20,253  ex_ret_fused_instr:u
 1,000,019,794  ex_ret_fused_instr:u
CMP with memory and immediate
20,260  ex_ret_fused_instr:u
20,264  ex_ret_fused_instr:u
CMP with IP relative memory
20,258  ex_ret_fused_instr:u
20,256  ex_ret_fused_instr:u
TEST
20,261  ex_ret_fused_instr:u
 1,000,020,048  ex_ret_fused_instr:u

zen1 and 2 gets:

ALU with immediate
2

[gcc r15-7812] Fortran: Fix gimplification error on assignment to pointer [PR103391]

2025-03-04 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:04909c7ecc023874c3444b85f88c60b7b7cc7778

commit r15-7812-g04909c7ecc023874c3444b85f88c60b7b7cc7778
Author: Andre Vehreschild 
Date:   Tue Mar 4 12:56:20 2025 +0100

Fortran: Fix gimplification error on assignment to pointer [PR103391]

PR fortran/103391

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_trans_assignment_1): Do not use poly assign
for pointer arrays on lhs (as it is done for allocatables
already).

gcc/testsuite/ChangeLog:

* gfortran.dg/assign_12.f90: New test.

Diff:
---
 gcc/fortran/trans-expr.cc   | 16 
 gcc/testsuite/gfortran.dg/assign_12.f90 | 28 
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 0d790b63f956..fbe7333fd711 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -12876,14 +12876,14 @@ gfc_trans_assignment_1 (gfc_expr * expr1, gfc_expr * 
expr2, bool init_flag,
  needed.  */
   lhs_attr = gfc_expr_attr (expr1);
 
-  is_poly_assign = (use_vptr_copy || lhs_attr.pointer
-   || (lhs_attr.allocatable && !lhs_attr.dimension))
-  && (expr1->ts.type == BT_CLASS
-  || gfc_is_class_array_ref (expr1, NULL)
-  || gfc_is_class_scalar_expr (expr1)
-  || gfc_is_class_array_ref (expr2, NULL)
-  || gfc_is_class_scalar_expr (expr2))
-  && lhs_attr.flavor != FL_PROCEDURE;
+  is_poly_assign
+= (use_vptr_copy
+   || ((lhs_attr.pointer || lhs_attr.allocatable) && !lhs_attr.dimension))
+  && (expr1->ts.type == BT_CLASS || gfc_is_class_array_ref (expr1, NULL)
+ || gfc_is_class_scalar_expr (expr1)
+ || gfc_is_class_array_ref (expr2, NULL)
+ || gfc_is_class_scalar_expr (expr2))
+  && lhs_attr.flavor != FL_PROCEDURE;
 
   assoc_assign = is_assoc_assign (expr1, expr2);
 
diff --git a/gcc/testsuite/gfortran.dg/assign_12.f90 
b/gcc/testsuite/gfortran.dg/assign_12.f90
new file mode 100644
index ..be31021f24c6
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/assign_12.f90
@@ -0,0 +1,28 @@
+!{ dg-do run }
+!
+! Check assignment works for derived types to memory referenced by pointer
+! Contributed by G. Steinmetz  
+
+program pr103391
+   type t
+ character(1) :: c
+   end type
+   type t2
+  type(t), pointer :: a(:)
+   end type
+
+   type(t), target :: arr(2)
+   type(t2) :: r
+
+   arr = [t('a'), t('b')]
+
+   r = f([arr])
+   if (any(r%a(:)%c /= ['a', 'b'])) stop 1
+contains
+   function f(x)
+  class(t), intent(in), target :: x(:)
+  type(t2) :: f
+  allocate(f%a(size(x,1)))
+  f%a = x
+   end
+end


[gcc r14-11382] c++: generic lambda, implicit 'this' capture, xobj memfn [PR119038]

2025-03-04 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:2ed37595a3c5cf246e8eeea337cf3071c4bb3a59

commit r14-11382-g2ed37595a3c5cf246e8eeea337cf3071c4bb3a59
Author: Patrick Palka 
Date:   Fri Feb 28 10:56:49 2025 -0500

c++: generic lambda, implicit 'this' capture, xobj memfn [PR119038]

When a generic lambda calls an overload set containing an iobj member
function we speculatively capture 'this'.  We need to do the same
for an xobj member function.

PR c++/119038

gcc/cp/ChangeLog:

* lambda.cc (maybe_generic_this_capture): Consider xobj
member functions as well, not just iobj.  Update function
comment.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/explicit-obj-lambda15.C: New test.

Reviewed-by: Jason Merrill 
(cherry picked from commit 1a150f1f688486b12cd975bdc4cd1bd52a7e0110)

Diff:
---
 gcc/cp/lambda.cc   |  7 +++
 gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C | 11 +++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index 5b5e31c141e3..3f69b1fb1264 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -950,9 +950,8 @@ maybe_resolve_dummy (tree object, bool add_capture_p)
 /* When parsing a generic lambda containing an argument-dependent
member function call we defer overload resolution to instantiation
time.  But we have to know now whether to capture this or not.
-   Do that if FNS contains any non-static fns.
-   The std doesn't anticipate this case, but I expect this to be the
-   outcome of discussion.  */
+   Do that if FNS contains any non-static fns as per
+   [expr.prim.lambda.capture]/7.1.  */
 
 void
 maybe_generic_this_capture (tree object, tree fns)
@@ -971,7 +970,7 @@ maybe_generic_this_capture (tree object, tree fns)
for (lkp_iterator iter (fns); iter; ++iter)
  if (((!id_expr && TREE_CODE (*iter) != USING_DECL)
   || TREE_CODE (*iter) == TEMPLATE_DECL)
- && DECL_IOBJ_MEMBER_FUNCTION_P (*iter))
+ && DECL_OBJECT_MEMBER_FUNCTION_P (*iter))
{
  /* Found a non-static member.  Capture this.  */
  lambda_expr_this_capture (lam, /*maybe*/-1);
diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C 
b/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C
new file mode 100644
index ..369f0895ed10
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C
@@ -0,0 +1,11 @@
+// PR c++/119038
+// { dg-do compile { target c++23 } }
+
+struct A {
+  void f() {
+[&](auto x) { g(x); h(x); }(0);
+  }
+
+  void g(this A&, int);
+  void h(this auto&, auto);
+};


[gcc r15-7816] AVR: Add texi @subsubsection "AVR Optimization Options".

2025-03-04 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:9ee39fcb15bd6ebd636ee65599b34a4c0d0818e4

commit r15-7816-g9ee39fcb15bd6ebd636ee65599b34a4c0d0818e4
Author: Georg-Johann Lay 
Date:   Sun Mar 2 16:30:11 2025 +0100

AVR: Add texi @subsubsection "AVR Optimization Options".

gcc/
* doc/invoke.texi (AVR Optimization Options): New @subsubsection
for pure optimization options.

Diff:
---
 gcc/doc/invoke.texi | 175 
 1 file changed, 93 insertions(+), 82 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6f8bf3923863..6f0779b900cb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24353,33 +24353,6 @@ instructions.  This option has only an effect on 
reduced Tiny devices like
 ATtiny40.  See also the @code{absdata}
 @ref{AVR Variable Attributes,variable attribute}.
 
-@opindex maccumulate-args
-@item -maccumulate-args
-Accumulate outgoing function arguments and acquire/release the needed
-stack space for outgoing function arguments once in function
-prologue/epilogue.  Without this option, outgoing arguments are pushed
-before calling a function and popped afterwards.
-
-Popping the arguments after the function call can be expensive on
-AVR so that accumulating the stack space might lead to smaller
-executables because arguments need not be removed from the
-stack after such a function call.
-
-This option can lead to reduced code size for functions that perform
-several calls to functions that get their arguments on the stack like
-calls to printf-like functions.
-
-@opindex mbranch-cost
-@item -mbranch-cost=@var{cost}
-Set the branch costs for conditional branch instructions to
-@var{cost}.  Reasonable values for @var{cost} are small, non-negative
-integers. The default branch cost is 0.
-
-@opindex mcall-prologues
-@item -mcall-prologues
-Functions prologues/epilogues are expanded as calls to appropriate
-subroutines.  Code size is smaller.
-
 @opindex mcvt
 @item -mcvt
 Use a @emph{compact vector table}.  Some devices support a CVT
@@ -24397,27 +24370,6 @@ For example, you can link with 
@code{-Wl,--defsym,__init_cvt=0}.
 The CVT startup code is available since
 @w{@uref{https://github.com/avrdudes/avr-libc/issues/1010,AVR-LibC v2.3}}.
 
-@opindex mfuse-add
-@item -mfuse-add
-@itemx -mno-fuse-add
-@itemx -mfuse-add=@var{level}
-Optimize indirect memory accesses on reduced Tiny devices.
-The default uses @code{@var{level}=1} for optimizations @option{-Og}
-and @option{-O1}, and @code{@var{level}=2} for higher optimizations.
-Valid values for @var{level} are @code{0}, @code{1} and @code{2}.
-
-@opindex mfuse-move
-@item -mfuse-move
-@itemx -mno-fuse-move
-@itemx -mfuse-move=@var{level}
-Run a post reload optimization pass that tries to fuse move instructions
-and to split multi-byte instructions into 8-bit operations.
-The default uses @code{@var{level}=3} for optimization @option{-O1},
-and @code{@var{level}=23} for higher optimizations.
-Valid values for @var{level} are in the range @code{0} @dots{} @code{23}
-which is a 3:2:2:2 mixed radix value.  Each digit controls some
-aspect of the optimization.
-
 @opindex mdouble
 @opindex mlong-double
 @item -mdouble=@var{bits}
@@ -24506,39 +24458,6 @@ support 
(@w{@uref{https://sourceware.org/PR31124,PR31124}}) is available.
 In that case, @option{-mrodata-in-ram} can be used to return to the old
 layout with @code{.rodata} in RAM.
 
-@opindex mstrict-X
-@item -mstrict-X
-Use address register @code{X} in a way proposed by the hardware.  This means
-that @code{X} is only used in indirect, post-increment or
-pre-decrement addressing.
-
-Without this option, the @code{X} register may be used in the same way
-as @code{Y} or @code{Z} which then is emulated by additional
-instructions.
-For example, loading a value with @code{X+const} addressing with a
-small non-negative @code{const < 64} to a register @var{Rn} is
-performed as
-
-@example
-adiw r26, const   ; X += const
-ld   @var{Rn}, X; @var{Rn} = *X
-sbiw r26, const   ; X -= const
-@end example
-
-@opindex msplit-bit-shift
-@item -msplit-bit-shift
-Split multi-byte shifts with a constant offset into a shift with
-a byte offset and a residual shift with a non-byte offset.
-This optimization is turned on per default for @option{-O2} and higher,
-including @option{-Os} but excluding @option{-Oz}.
-Splitting of shifts with a constant offset that is
-a multiple of 8 is controlled by @option{-mfuse-move}.
-
-@opindex msplit-ldst
-@item -msplit-ldst
-Split multi-byte loads and stores into several byte loads and stores.
-This optimization is turned on per default for @option{-O2} and higher.
-
 @opindex mtiny-stack
 @item -mtiny-stack
 Only change the lower 8@tie{}bits of the stack pointer.
@@ -24590,6 +24509,98 @@ Warn if the ISR is misspelled, i.e.@: without __vector 
prefix.
 Enabled by default.
 @end table
 
+
+@subsubsection AVR Optimization Options
+The following options are pure optimization options.
+Options @option

[gcc r15-7817] C prototypes for external arguments; add warning for mismatch.

2025-03-04 Thread Thomas Koenig via Gcc-cvs
https://gcc.gnu.org/g:21ca9153ebe525b077ac96811cfd48be6b259e7e

commit r15-7817-g21ca9153ebe525b077ac96811cfd48be6b259e7e
Author: Thomas Koenig 
Date:   Tue Mar 4 20:13:19 2025 +0100

C prototypes for external arguments; add warning for mismatch.

The problem was that we were not handling external dummy arguments
with -fc-prototypes-external. In looking at this, I found that we
were not warning about external procedures with different argument
lists.  This can actually be legal (see the two test cases) but
creates a problem for the C prototypes: If we have something like

subroutine foo(a,n)
  external a
  if (n == 1) call a(1)
  if (n == 2) call a(2,3)
end subroutine foo

then, pre-C23, we could just have written out the prototype as

void foo_ (void (*a) (), int *n);

but this is illegal in C23. What to do?  I finally chose to warn
about the argument mismatch, with a new option. Warn only because the
code above is legal, but include in -Wall because such code seems highly
suspect.  This option is also implied in -fc-prototypes-external. I also
put a warning in the generated header file in that case, so users
have a chance to see what is going on (especially since gcc now
defaults to C23).

gcc/fortran/ChangeLog:

PR fortran/119049
PR fortran/119074
* dump-parse-tree.cc (seen_conflict): New static varaible.
(gfc_dump_external_c_prototypes): Initialize it. If it was
set, write out a warning that -std=c23 will not work.
(write_proc): Move the work of actually writing out the
formal arglist to...
(write_formal_arglist): New function. Handle external dummy
parameters and their argument lists. If there were mismatched
arguments, output an empty argument list in pre-C23 style.
* gfortran.h (struct gfc_symbol): Add ext_dummy_arglist_mismatch
flag and formal_at.
* invoke.texi: Document -Wexternal-argument-mismatch.
* lang.opt: Put it in.
* resolve.cc (resolve_function): If warning about external
argument mismatches, build a formal from actual arglist the
first time around, and later compare and warn.
(resolve_call): Likewise

gcc/testsuite/ChangeLog:

PR fortran/119049
PR fortran/119074
* gfortran.dg/interface_55.f90: New test.
* gfortran.dg/interface_56.f90: New test.

Diff:
---
 gcc/fortran/dump-parse-tree.cc | 114 +++--
 gcc/fortran/gfortran.h |   8 ++
 gcc/fortran/invoke.texi|  10 +++
 gcc/fortran/lang.opt   |   4 +
 gcc/fortran/resolve.cc |  63 
 gcc/testsuite/gfortran.dg/interface_55.f90 |  26 +++
 gcc/testsuite/gfortran.dg/interface_56.f90 |  32 
 7 files changed, 220 insertions(+), 37 deletions(-)

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 7726b708ad89..1a15757b57be 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -4108,6 +4108,8 @@ gfc_dump_c_prototypes (FILE *file)
 
 /* Loop over all external symbols, writing out their declarations.  */
 
+static bool seen_conflict;
+
 void
 gfc_dump_external_c_prototypes (FILE * file)
 {
@@ -4119,6 +4121,7 @@ gfc_dump_external_c_prototypes (FILE * file)
 return;
 
   dumpfile = file;
+  seen_conflict = false;
   fprintf (dumpfile,
   _("/* Prototypes for external procedures generated from %s\n"
 "   by GNU Fortran %s%s.\n\n"
@@ -4130,6 +4133,11 @@ gfc_dump_external_c_prototypes (FILE * file)
 return;
 
   gfc_traverse_gsymbol (gfc_gsym_root, show_external_symbol, (void *) &bind_c);
+  if (seen_conflict)
+fprintf (dumpfile,
+_("\n\n/* WARNING: Because of differing arguments to an external\n"
+  "   procedure, this header file is not compatible with -std=c23."
+  "\n\n   Use another -std option to compile.  */\n"));
 }
 
 /* Callback function for dumping external symbols, be they BIND(C) or
@@ -4406,52 +4414,35 @@ write_variable (gfc_symbol *sym)
   fputs (";\n", dumpfile);
 }
 
-
-/* Write out a procedure, including its arguments.  */
 static void
-write_proc (gfc_symbol *sym, bool bind_c)
+write_formal_arglist (gfc_symbol *sym, bool bind_c)
 {
-  const char *pre, *type_name, *post;
-  bool asterisk;
-  enum type_return rok;
   gfc_formal_arglist *f;
-  const char *sym_name;
-  const char *intent_in;
-  bool external_character;
-
-  external_character =  sym->ts.type == BT_CHARACTER && !bind_c;
-
-  if (sym->binding_label)
-sym_name = sym->binding_label;
-  else
-sym_name = sym->name;
-
-  if (sym->ts.type == BT_UNKNOWN || external_character)
-{
-  fprintf (dumpfile, "void ");
-  fputs 

[gcc r15-7810] c++: ICE with RANGE_EXPR and array init [PR109431]

2025-03-04 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:173cf7c9b8c0d61bb2cb0bd3a9e3150b393ab59a

commit r15-7810-g173cf7c9b8c0d61bb2cb0bd3a9e3150b393ab59a
Author: Marek Polacek 
Date:   Thu Feb 27 17:42:49 2025 -0500

c++: ICE with RANGE_EXPR and array init [PR109431]

We crash because we generate

  {[0 ... 1]={.low=0, .high=1}, [1]={.low=0, .high=1}}

which output_constructor_regular_field doesn't want to see.  This
happens since r9-1483: process_init_constructor_array can now create
a RANGE_EXPR.  But the bug isn't in that patch; the problem is that
build_vec_init doesn't handle RANGE_EXPRs.

build_vec_init has a FOR_EACH_CONSTRUCTOR_ELT loop which populates
const_vec.  In this case it loops over the elements of

  {[0 ... 1]={.low=0, .high=1}}

but assumes that each element initializes one element.  So after the
loop num_initialized_elts was 1, and then below:

  HOST_WIDE_INT last = tree_to_shwi (maxindex);
  if (num_initialized_elts <= last)
{
  tree field = size_int (num_initialized_elts);
  if (num_initialized_elts != last)
field = build2 (RANGE_EXPR, sizetype, field,
size_int (last));
  CONSTRUCTOR_APPEND_ELT (const_vec, field, e);
}

we added the extra initializer.

It seemed convenient to use range_expr_nelts like below.

PR c++/109431

gcc/cp/ChangeLog:

* cp-tree.h (range_expr_nelts): Declare.
* init.cc (build_vec_init): If the CONSTRUCTOR's index is a
RANGE_EXPR, use range_expr_nelts to count how many elements
were initialized.

gcc/testsuite/ChangeLog:

* g++.dg/init/array67.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/cp-tree.h|  1 +
 gcc/cp/init.cc  |  5 -
 gcc/testsuite/g++.dg/init/array67.C | 29 +
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c76a92dd39be..583d04963642 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8621,6 +8621,7 @@ extern tree mangle_decomp (tree, 
vec &);
 extern void mangle_module_substitution (int);
 extern int mangle_module_component (tree id, bool partition);
 extern tree mangle_module_global_init  (int);
+extern unsigned HOST_WIDE_INT range_expr_nelts (tree);
 
 /* in dump.cc */
 extern bool cp_dump_tree   (void *, tree);
diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index f77da4267cbd..ce6e58e05f2f 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -4802,7 +4802,10 @@ build_vec_init (tree base, tree maxindex, tree init,
  tree baseref = build1 (INDIRECT_REF, type, base);
  tree one_init;
 
- num_initialized_elts++;
+ if (field && TREE_CODE (field) == RANGE_EXPR)
+   num_initialized_elts += range_expr_nelts (field);
+ else
+   num_initialized_elts++;
 
  /* We need to see sub-array TARGET_EXPR before cp_fold_r so we can
 handle cleanup flags properly.  */
diff --git a/gcc/testsuite/g++.dg/init/array67.C 
b/gcc/testsuite/g++.dg/init/array67.C
new file mode 100644
index ..8aee8fd481a6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/array67.C
@@ -0,0 +1,29 @@
+// PR c++/109431
+// { dg-do run { target c++11 } }
+
+struct RangeLimits
+{
+int low = 0;
+int high = 1;
+constexpr RangeLimits() { }
+};
+
+template 
+int parameterLimits(void)
+{
+static RangeLimits constexpr param_limits[2] = {};
+if (param_limits[0].low != 0
+   || param_limits[0].high != 1
+   || param_limits[1].low != 0
+   || param_limits[1].high != 1)
+  __builtin_abort ();
+auto const &limits = param_limits[1];
+return 0;
+}
+
+auto s = parameterLimits<1>();
+
+int
+main ()
+{
+}


[gcc r14-11378] testsuite: arm: Use effective-target for pr68674.c test

2025-03-04 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:a42ef0938f38b05919d8586883150819ee0ce757

commit r14-11378-ga42ef0938f38b05919d8586883150819ee0ce757
Author: Torbjörn SVENSSON 
Date:   Fri Nov 8 18:39:32 2024 +0100

testsuite: arm: Use effective-target for pr68674.c test

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr68674.c: Use effective-target arm_arch_v7a
and arm_libc_fp_abi.

Signed-off-by: Torbjörn SVENSSON 
(cherry picked from commit 879fd9c822633ecf2c62471d1a7f9b9619e296b7)

Diff:
---
 gcc/testsuite/gcc.target/arm/pr68674.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr68674.c 
b/gcc/testsuite/gcc.target/arm/pr68674.c
index 0b3237458fe9..3fd562d05185 100644
--- a/gcc/testsuite/gcc.target/arm/pr68674.c
+++ b/gcc/testsuite/gcc.target/arm/pr68674.c
@@ -1,9 +1,10 @@
 /* PR target/68674 */
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_neon_ok } */
-/* { dg-require-effective-target arm_fp_ok } */
+/* { dg-require-effective-target arm_arch_v7a_ok } */
+/* { dg-require-effective-target arm_libc_fp_abi_ok } */
 /* { dg-options "-O2" } */
-/* { dg-add-options arm_fp } */
+/* { dg-add-options arm_arch_v7a } */
+/* { dg-add-options arm_libc_fp_abi } */
 
 #pragma GCC target ("fpu=vfp")


[gcc r14-11381] aarch64: Add missing simd requirements for INS [PR118531]

2025-03-04 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:df9c10d18b5b1323efb5f7823c31a259859d87a4

commit r14-11381-gdf9c10d18b5b1323efb5f7823c31a259859d87a4
Author: Richard Sandiford 
Date:   Tue Mar 4 17:49:31 2025 +

aarch64: Add missing simd requirements for INS [PR118531]

In g:b096a6ebe9d9f9fed4c105f6555f724eb32af95c I'd forgotten
to gate some uses of INS on TARGET_SIMD.

gcc/
PR target/118531
* config/aarch64/aarch64.md (*insv_reg_)
(*aarch64_bfi_)
(*aarch64_bfidi_subreg_): Add missing
simd requirements.

gcc/testsuite/
* gcc.target/aarch64/ins_bitfield_1a.c: New test.
* gcc.target/aarch64/ins_bitfield_3a.c: Likewise.
* gcc.target/aarch64/ins_bitfield_5a.c: Likewise.

(cherry picked from commit 1b8820421488d220a95f651b51175d618063c48c)

Diff:
---
 gcc/config/aarch64/aarch64.md  | 9 ++---
 gcc/testsuite/gcc.target/aarch64/ins_bitfield_1a.c | 8 
 gcc/testsuite/gcc.target/aarch64/ins_bitfield_3a.c | 8 
 gcc/testsuite/gcc.target/aarch64/ins_bitfield_5a.c | 8 
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index dbde066f7478..a08523a2b074 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6130,7 +6130,8 @@
   return "ins\t%0.[%1], %2.[0]";
 return "ins\t%0.[%1], %w2";
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 (define_insn "*insv_reg"
@@ -6163,7 +6164,8 @@
 operands[2] = lowpart_subreg (mode, operands[2],
  mode);
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 (define_insn "*aarch64_bfi4"
@@ -6195,7 +6197,8 @@
   {
 operands[2] = lowpart_subreg (DImode, operands[3], mode);
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 ;;  Match a bfi instruction where the shift of OP3 means that we are
diff --git a/gcc/testsuite/gcc.target/aarch64/ins_bitfield_1a.c 
b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_1a.c
new file mode 100644
index ..028d4aa1e891
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_1a.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+#pragma GCC target "+nosimd"
+
+#include "ins_bitfield_1.c"
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ins_bitfield_3a.c 
b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_3a.c
new file mode 100644
index ..1c153667a8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_3a.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+#pragma GCC target "+nosimd"
+
+#include "ins_bitfield_3.c"
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ins_bitfield_5a.c 
b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_5a.c
new file mode 100644
index ..f6bdde97f987
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ins_bitfield_5a.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+#pragma GCC target "+nosimd"
+
+#include "ins_bitfield_5.c"
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */


[gcc r14-11380] Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]

2025-03-04 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:aa8793daa4ec110ae1e8fa240614651711b93fe4

commit r14-11380-gaa8793daa4ec110ae1e8fa240614651711b93fe4
Author: Richard Sandiford 
Date:   Tue Mar 4 17:49:30 2025 +

Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]

There was an embarrassing typo in the folding of BIT_NOT_EXPR for
POLY_INT_CSTs: it used - rather than ~ on the poly_int.  Not sure
how that happened, but it might have been due to the way that
~x is implemented as -1 - x internally.

gcc/
PR tree-optimization/118976
* fold-const.cc (const_unop): Use ~ rather than - for BIT_NOT_EXPR.
* config/aarch64/aarch64.cc (aarch64_test_sve_folding): New 
function.
(aarch64_run_selftests): Run it.

(cherry picked from commit 78380fd7f743e23dfdf013d68a2f0347e1511550)

Diff:
---
 gcc/config/aarch64/aarch64.cc | 11 +++
 gcc/fold-const.cc |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 32adc2fa9854..0495adc7dd37 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -30544,6 +30544,16 @@ aarch64_test_sysreg_encoding_clashes (void)
 }
 }
 
+/* Test SVE arithmetic folding.  */
+
+static void
+aarch64_test_sve_folding ()
+{
+  tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
+ssize_int (poly_int64 (1, 1)));
+  ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1;
+}
+
 /* Run all target-specific selftests.  */
 
 static void
@@ -30552,6 +30562,7 @@ aarch64_run_selftests (void)
   aarch64_test_loading_full_dump ();
   aarch64_test_fractional_cost ();
   aarch64_test_sysreg_encoding_clashes ();
+  aarch64_test_sve_folding ();
 }
 
 } // namespace selftest
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 0d8942deedd3..7427caace7c9 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1953,7 +1953,7 @@ const_unop (enum tree_code code, tree type, tree arg0)
   if (TREE_CODE (arg0) == INTEGER_CST)
return fold_not_const (arg0, type);
   else if (POLY_INT_CST_P (arg0))
-   return wide_int_to_tree (type, -poly_int_cst_value (arg0));
+   return wide_int_to_tree (type, ~poly_int_cst_value (arg0));
   /* Perform BIT_NOT_EXPR on each element individually.  */
   else if (TREE_CODE (arg0) == VECTOR_CST)
{


[gcc r15-7822] c++: Apply/diagnose attributes when instatiating ARRAY/POINTER/REFERENCE_TYPE [PR118787]

2025-03-04 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:1853b02d8c127740055242123db2d32cf9476ea9

commit r15-7822-g1853b02d8c127740055242123db2d32cf9476ea9
Author: Jakub Jelinek 
Date:   Wed Mar 5 06:41:00 2025 +0100

c++: Apply/diagnose attributes when instatiating 
ARRAY/POINTER/REFERENCE_TYPE [PR118787]

The following testcase IMO in violation of the P2552R3 paper doesn't
pedwarn on alignas applying to dependent types or alignas with dependent
argument.

tsubst was just ignoring TYPE_ATTRIBUTES.

The following patch fixes it for the POINTER/REFERENCE_TYPE and
ARRAY_TYPE cases, but perhaps we need to do the same also for other
types (INTEGER_TYPE/REAL_TYPE and the like).  I guess I'll need to
construct more testcases.

2025-03-05  Jakub Jelinek  

PR c++/118787
* pt.cc (tsubst) : Use return t; only if it doesn't
have any TYPE_ATTRIBUTES.  Call apply_late_template_attributes.
: Likewise.  Formatting fix.

* g++.dg/cpp0x/alignas22.C: New test.

Diff:
---
 gcc/cp/pt.cc   | 24 +++-
 gcc/testsuite/g++.dg/cpp0x/alignas22.C | 23 +++
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 62d91a2dd159..c09a934580f2 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -16865,7 +16865,9 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
 case POINTER_TYPE:
 case REFERENCE_TYPE:
   {
-   if (type == TREE_TYPE (t) && TREE_CODE (type) != METHOD_TYPE)
+   if (type == TREE_TYPE (t)
+   && TREE_CODE (type) != METHOD_TYPE
+   && TYPE_ATTRIBUTES (t) == NULL_TREE)
  return t;
 
/* [temp.deduct]
@@ -16935,9 +16937,9 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
 A,' while an attempt to create the type type rvalue reference to
 cv T' creates the type T"
  */
- r = cp_build_reference_type
- (TREE_TYPE (type),
-  TYPE_REF_IS_RVALUE (t) && TYPE_REF_IS_RVALUE (type));
+ r = cp_build_reference_type (TREE_TYPE (type),
+  TYPE_REF_IS_RVALUE (t)
+  && TYPE_REF_IS_RVALUE (type));
else
  r = cp_build_reference_type (type, TYPE_REF_IS_RVALUE (t));
r = cp_build_qualified_type (r, cp_type_quals (t), complain);
@@ -16946,6 +16948,11 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  /* Will this ever be needed for TYPE_..._TO values?  */
  layout_type (r);
 
+   if (!apply_late_template_attributes (&r, TYPE_ATTRIBUTES (t),
+/*flags=*/0,
+args, complain, in_decl))
+ return error_mark_node;
+
return r;
   }
 case OFFSET_TYPE:
@@ -17020,7 +17027,9 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
 
/* As an optimization, we avoid regenerating the array type if
   it will obviously be the same as T.  */
-   if (type == TREE_TYPE (t) && domain == TYPE_DOMAIN (t))
+   if (type == TREE_TYPE (t)
+   && domain == TYPE_DOMAIN (t)
+   && TYPE_ATTRIBUTES (t) == NULL_TREE)
  return t;
 
/* These checks should match the ones in create_array_type_for_decl.
@@ -17059,6 +17068,11 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
TYPE_USER_ALIGN (r) = 1;
  }
 
+   if (!apply_late_template_attributes (&r, TYPE_ATTRIBUTES (t),
+/*flags=*/0,
+args, complain, in_decl))
+ return error_mark_node;
+
return r;
   }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/alignas22.C 
b/gcc/testsuite/g++.dg/cpp0x/alignas22.C
new file mode 100644
index ..e7929774c3c0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alignas22.C
@@ -0,0 +1,23 @@
+// PR c++/118787
+// { dg-do compile { target c++11 } }
+// { dg-options "-pedantic" }
+
+template 
+void foo (T & alignas (N));// { dg-warning "'alignas' on a type 
other than class" }
+template 
+void bar (T (&)[N] alignas (N));   // { dg-warning "'alignas' on a type 
other than class" }
+template 
+using U = T * alignas (N); // { dg-warning "'alignas' on a type 
other than class" }
+template 
+using V = T[N] alignas (N);// { dg-warning "'alignas' on a type 
other than class" }
+
+void
+baz ()
+{
+  int x alignas (4) = 0;
+  foo  (x);
+  int y alignas (4) [4];
+  bar  (y);
+  U  u;
+  V  v;
+}


[gcc r15-7819] c++: C++23 range-for temps and ?: [PR119073]

2025-03-04 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:f2a7f845698bfb7aaded9ffacd7046ae25a8b1fc

commit r15-7819-gf2a7f845698bfb7aaded9ffacd7046ae25a8b1fc
Author: Jason Merrill 
Date:   Tue Mar 4 16:36:47 2025 -0500

c++: C++23 range-for temps and ?: [PR119073]

Here gimplification got confused because extend_temps_r messed up the types
of the arms of a COND_EXPR.

PR c++/119073

gcc/cp/ChangeLog:

* call.cc (extend_temps_r): Preserve types of COND_EXPR arms.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/range-for39.C: New test.

Diff:
---
 gcc/cp/call.cc   |  2 +-
 gcc/testsuite/g++.dg/cpp0x/range-for39.C | 12 
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index be9b0cf62f10..f7b4cccb1c7a 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -14902,7 +14902,7 @@ extend_temps_r (tree *tp, int *walk_subtrees, void 
*data)
  {
tree set = build2 (MODIFY_EXPR, boolean_type_node,
   cur_cond_guard, boolean_true_node);
-   op = add_stmt_to_compound (set, op);
+   op = cp_build_compound_expr (set, op, tf_none);
  }
   };
   walk_arm (TREE_OPERAND (*tp, 1));
diff --git a/gcc/testsuite/g++.dg/cpp0x/range-for39.C 
b/gcc/testsuite/g++.dg/cpp0x/range-for39.C
new file mode 100644
index ..ebb6acafe7f3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/range-for39.C
@@ -0,0 +1,12 @@
+// PR c++/119073
+// { dg-do compile { target c++11 } }
+
+struct A { ~A (); };
+struct B { B (const A &a = A ()); int *begin (); int *end (); ~B (); };
+
+void
+foo (bool x)
+{
+  for (auto i : (x ? B{} : B{}))
+;
+}


[gcc r15-7824] openmp, c++: Fix up OpenMP/OpenACC handling in C++ modules [PR119102]

2025-03-04 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ddeb70548c81f5dba91f281290584698897151d8

commit r15-7824-gddeb70548c81f5dba91f281290584698897151d8
Author: Jakub Jelinek 
Date:   Wed Mar 5 07:47:52 2025 +0100

openmp, c++: Fix up OpenMP/OpenACC handling in C++ modules [PR119102]

modules.cc has apparently support for extensions and attempts to ensure
that if a module is compiled with those extensions enabled, sources which
use the module are compiled with the same extensions.
The only extension supported is SE_OPENMP right now.
And the use of the extension is keyed on streaming out or in OMP_CLAUSE
tree.
This is undesirable for several reasons.
OMP_CLAUSE is the only tree which can appear in the IL even without
-fopenmp/-fopenmp-simd/-fopenacc (when simd ("notinbranch") or
simd ("inbranch") attributes are used), and it can appear also in all
the 3 modes mentioned above.  On the other side, with the exception of
arguments of attributes added e.g. for declare simd where no harm should
be done if -fopenmp/-fopenmp-simd isn't enabled later on, OMP_CLAUSE appears
in OMP_*_CLAUSES of OpenMP/OpenACC construct trees.  And those construct
trees often have no clauses at all, so keying the extension on OMP_CLAUSE
doesn't catch many cases that should be caught.
Furthermore, for OpenMP we have 2 modes, -fopenmp-simd which parses some
OpenMP but constructs from that mostly OMP_SIMD and a few other cases,
and -fopenmp which includes that and far more on top of that; and there is
also -fopenacc.

So, this patch stops setting/requesting the extension on OMP_CLAUSE,
introduces 3 extensions rather than one (SE_OPENMP_SIMD, SE_OPENMP and
SE_OPENACC) and keyes those on OpenMP constructs from the -fopenmp-simd
subset, other OpenMP constructs and OpenACC constructs.

2025-03-05  Jakub Jelinek  

PR c++/119102
gcc/cp/
* module.cc (enum streamed_extensions): Add SE_OPENMP_SIMD
and SE_OPENACC, change value of SE_OPENMP and SE_BITS.
(CASE_OMP_SIMD_CODE, CASE_OMP_CODE, CASE_OACC_CODE): Define.
(trees_out::start): Don't set SE_OPENMP extension for OMP_CLAUSE.
Set SE_OPENMP_SIMD extension for CASE_OMP_SIMD_CODE, SE_OPENMP
for CASE_OMP_CODE and SE_OPENACC for CASE_OACC_CODE.
(trees_in::start): Don't fail for OMP_CLAUSE with missing
SE_OPENMP extension.  Do fail for CASE_OMP_SIMD_CODE and missing
SE_OPENMP_SIMD extension, or CASE_OMP_CODE and missing SE_OPENMP
extension, or CASE_OACC_CODE and missing SE_OPENACC extension.
(module_state::write_readme): Write all of SE_OPENMP_SIMD, SE_OPENMP
and SE_OPENACC extensions.
(module_state::read_config): Diagnose missing -fopenmp, 
-fopenmp-simd
and/or -fopenacc depending on extensions used.
gcc/testsuite/
* g++.dg/modules/pr119102_a.H: New test.
* g++.dg/modules/pr119102_b.C: New test.
* g++.dg/modules/omp-3_a.C: New test.
* g++.dg/modules/omp-3_b.C: New test.
* g++.dg/modules/omp-3_c.C: New test.
* g++.dg/modules/omp-3_d.C: New test.
* g++.dg/modules/oacc-1_a.C: New test.
* g++.dg/modules/oacc-1_b.C: New test.
* g++.dg/modules/oacc-1_c.C: New test.

Diff:
---
 gcc/cp/module.cc  | 111 ++
 gcc/testsuite/g++.dg/modules/oacc-1_a.C   |  15 
 gcc/testsuite/g++.dg/modules/oacc-1_b.C   |  11 +++
 gcc/testsuite/g++.dg/modules/oacc-1_c.C   |   9 +++
 gcc/testsuite/g++.dg/modules/omp-3_a.C|  14 
 gcc/testsuite/g++.dg/modules/omp-3_b.C|  11 +++
 gcc/testsuite/g++.dg/modules/omp-3_c.C|  11 +++
 gcc/testsuite/g++.dg/modules/omp-3_d.C|   9 +++
 gcc/testsuite/g++.dg/modules/pr119102_a.H |   9 +++
 gcc/testsuite/g++.dg/modules/pr119102_b.C |   9 +++
 10 files changed, 197 insertions(+), 12 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 59716e1873e9..8b0f42951c24 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -3613,8 +3613,10 @@ void slurping::release_macros ()
 /* Flags for extensions that end up being streamed.  */
 
 enum streamed_extensions {
-  SE_OPENMP = 1 << 0,
-  SE_BITS = 1
+  SE_OPENMP_SIMD = 1 << 0,
+  SE_OPENMP = 1 << 1,
+  SE_OPENACC = 1 << 2,
+  SE_BITS = 3
 };
 
 /* Counter indices.  */
@@ -5276,6 +5278,53 @@ trees_in::tree_list (bool has_purpose)
 
   return res;
 }
+
+#define CASE_OMP_SIMD_CODE \
+case OMP_SIMD: \
+case OMP_STRUCTURED_BLOCK: \
+case OMP_LOOP: \
+case OMP_ORDERED:  \
+case OMP_TILE: \
+case OMP_UNROLL
+#define CASE_OMP_CODE \
+case OMP_PARALLEL: \
+case OMP_TASK: \
+case OMP_FOR:  \
+case OMP_DISTRIBUTE:

[gcc r15-7806] simplify-rtx: Fix up simplify_logical_relational_operation [PR119002]

2025-03-04 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:1ff01a88c484775fe8b5f1ca46fa24dfe0b14f3d

commit r15-7806-g1ff01a88c484775fe8b5f1ca46fa24dfe0b14f3d
Author: Richard Sandiford 
Date:   Tue Mar 4 10:44:34 2025 +

simplify-rtx: Fix up simplify_logical_relational_operation [PR119002]

The following testcase is miscompiled on powerpc64le-linux starting with
r15-6777.  During combine we see:

(set (reg:SI 134)
(ior:SI (ge:SI (reg:CCFP 128)
(const_int 0 [0]))
(lt:SI (reg:CCFP 128)
(const_int 0 [0]

The simplify_logical_relational_operation code (in its current form)
was written with arithmetic rather than CC modes in mind.  Since CCFP
is a CC mode, it fails the HONOR_NANS check, and so the function assumes
that ge | lt => true.

If one comparison is unsigned then it should be safe to assume that
the other comparison is also unsigned, even for CC modes, since the
optimisation checks that the comparisons are between the same operands.
For the other cases, we can only safely fold comparisons of CC mode
values if the result is always-true (15) or always-false (0).

It turns out that the original testcase for PR117186, which ran at -O,
was relying on the old behaviour for some of the functions.  It needs
4-instruction combinations, and so -fexpensive-optimizations, to pass
in its intended form.

gcc/
PR rtl-optimization/119002
* simplify-rtx.cc
(simplify_context::simplify_logical_relational_operation): Handle
comparisons between CC values.  If there is no evidence that the
CC values are unsigned, restrict the fold to always-true or
always-false results.

gcc/testsuite/
* gcc.c-torture/execute/ieee/pr119002.c: New test.
* gcc.target/aarch64/pr117186.c: Run at -O2 rather than -O.

Co-authored-by: Jakub Jelinek 

Diff:
---
 gcc/simplify-rtx.cc| 12 +--
 .../gcc.c-torture/execute/ieee/pr119002.c  | 23 ++
 gcc/testsuite/gcc.target/aarch64/pr117186.c|  2 +-
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index c478bd060fc6..fe007bc7d96a 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -2655,6 +2655,9 @@ simplify_context::simplify_logical_relational_operation 
(rtx_code code,
 
   enum rtx_code code0 = GET_CODE (op0);
   enum rtx_code code1 = GET_CODE (op1);
+  machine_mode cmp_mode = GET_MODE (XEXP (op0, 0));
+  if (cmp_mode == VOIDmode)
+cmp_mode = GET_MODE (XEXP (op0, 1));
 
   /* Assume at first that the comparisons are on integers, and that the
  operands are therefore ordered.  */
@@ -2672,8 +2675,10 @@ simplify_context::simplify_logical_relational_operation 
(rtx_code code,
 }
   else
 {
-  /* See whether the operands might be unordered.  */
-  if (HONOR_NANS (GET_MODE (XEXP (op0, 0
+  /* See whether the operands might be unordered.  Assume that all
+results are possible for CC modes, and punt later if we don't get an
+always-true or always-false answer.  */
+  if (GET_MODE_CLASS (cmp_mode) == MODE_CC || HONOR_NANS (cmp_mode))
all = 15;
   mask0 = comparison_to_mask (code0) & all;
   mask1 = comparison_to_mask (code1) & all;
@@ -2702,6 +2707,9 @@ simplify_context::simplify_logical_relational_operation 
(rtx_code code,
 code = mask_to_unsigned_comparison (mask);
   else
 {
+  if (GET_MODE_CLASS (cmp_mode) == MODE_CC)
+   return 0;
+
   code = mask_to_comparison (mask);
   /* LTGT and NE are arithmetically equivalent for ordered operands,
 with NE being the canonical choice.  */
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/pr119002.c 
b/gcc/testsuite/gcc.c-torture/execute/ieee/pr119002.c
new file mode 100644
index ..af1a705f1707
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/pr119002.c
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/119002 */
+
+__attribute__((noipa)) unsigned int
+foo (void *x, float y, float z)
+{
+  unsigned int a, b;
+  float c, d, e;
+  c = y;
+  d = z;
+  a = c < d;
+  d = y;
+  e = z;
+  b = d >= e;
+  a |= b;
+  return a;
+}
+
+int
+main ()
+{
+  if (foo ((void *) 0, 0.f, __builtin_nanf ("")))
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr117186.c 
b/gcc/testsuite/gcc.target/aarch64/pr117186.c
index afe3c25a4930..3090f2c11d53 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr117186.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr117186.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O" } */
+/* { dg-options "-O2" } */
 /* { dg-final { check-function-bodies "**" "" "" } } */
 
 /*


[gcc r15-7807] Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]

2025-03-04 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:78380fd7f743e23dfdf013d68a2f0347e1511550

commit r15-7807-g78380fd7f743e23dfdf013d68a2f0347e1511550
Author: Richard Sandiford 
Date:   Tue Mar 4 10:44:35 2025 +

Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]

There was an embarrassing typo in the folding of BIT_NOT_EXPR for
POLY_INT_CSTs: it used - rather than ~ on the poly_int.  Not sure
how that happened, but it might have been due to the way that
~x is implemented as -1 - x internally.

gcc/
PR tree-optimization/118976
* fold-const.cc (const_unop): Use ~ rather than - for BIT_NOT_EXPR.
* config/aarch64/aarch64.cc (aarch64_test_sve_folding): New 
function.
(aarch64_run_selftests): Run it.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 11 +++
 gcc/fold-const.cc |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index fe76730b0a7c..af3871ce8a1f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -31336,6 +31336,16 @@ aarch64_test_sysreg_encoding_clashes (void)
 }
 }
 
+/* Test SVE arithmetic folding.  */
+
+static void
+aarch64_test_sve_folding ()
+{
+  tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
+ssize_int (poly_int64 (1, 1)));
+  ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1;
+}
+
 /* Run all target-specific selftests.  */
 
 static void
@@ -31344,6 +31354,7 @@ aarch64_run_selftests (void)
   aarch64_test_loading_full_dump ();
   aarch64_test_fractional_cost ();
   aarch64_test_sysreg_encoding_clashes ();
+  aarch64_test_sve_folding ();
 }
 
 } // namespace selftest
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index f9f7f4d2f917..fef7a6cc48ef 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1964,7 +1964,7 @@ const_unop (enum tree_code code, tree type, tree arg0)
   if (TREE_CODE (arg0) == INTEGER_CST)
return fold_not_const (arg0, type);
   else if (POLY_INT_CST_P (arg0))
-   return wide_int_to_tree (type, -poly_int_cst_value (arg0));
+   return wide_int_to_tree (type, ~poly_int_cst_value (arg0));
   /* Perform BIT_NOT_EXPR on each element individually.  */
   else if (TREE_CODE (arg0) == VECTOR_CST)
{


[gcc r15-7802] tree-optimization/119096 - bogus conditional reduction vectorization

2025-03-04 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:10e4107dfcf9fe324d0902f16411a75c596dab91

commit r15-7802-g10e4107dfcf9fe324d0902f16411a75c596dab91
Author: Richard Biener 
Date:   Mon Mar 3 14:12:37 2025 +0100

tree-optimization/119096 - bogus conditional reduction vectorization

When we vectorize a .COND_ADD reduction and apply the single-use-def
cycle optimization we can end up chosing the wrong else value for
subsequent .COND_ADD.  The following rectifies this.

PR tree-optimization/119096
* tree-vect-loop.cc (vect_transform_reduction): Use the
correct else value for .COND_fn.

* gcc.dg/vect/pr119096.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr119096.c | 21 +
 gcc/tree-vect-loop.cc|  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr119096.c 
b/gcc/testsuite/gcc.dg/vect/pr119096.c
new file mode 100644
index ..2c03a5936831
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119096.c
@@ -0,0 +1,21 @@
+#include "tree-vect.h"
+
+long __attribute__((noipa))
+sum(int* A, int* B)
+{
+long total = 0;
+for(int j = 0; j < 16; j++)
+if((A[j] > 0) & (B[j] > 0))
+total += (long)A[j];
+return total;
+}
+int main()
+{
+  int A[16] = { 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1 };
+  int B[16] = { };
+  check_vect ();
+  if (sum (A, B) != 0)
+abort ();
+  return 0;
+}
+
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index dc15b955aadf..52533623cab9 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9064,7 +9064,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
new_stmt = gimple_build_call_internal (internal_fn (code),
   op.num_ops,
   vop[0], vop[1], vop[2],
-  vop[1]);
+  vop[reduc_index]);
  else
new_stmt = gimple_build_assign (vec_dest, tree_code (op.code),
vop[0], vop[1], vop[2]);


[gcc r15-7805] testsuite: Add tests for already fixed PR [PR119071]

2025-03-04 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:ccf9db9a6fa4b5bc7aad5e9603e2ac71984142a0

commit r15-7805-gccf9db9a6fa4b5bc7aad5e9603e2ac71984142a0
Author: Jakub Jelinek 
Date:   Tue Mar 4 09:52:22 2025 +0100

testsuite: Add tests for already fixed PR [PR119071]

Uros' r15-7793 fixed this PR as well, I'm just committing tests
from the PR so that it can be closed.

2025-03-04  Jakub Jelinek  

PR rtl-optimization/119071
* gcc.dg/pr119071.c: New test.
* gcc.c-torture/execute/pr119071.c: New test.

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr119071.c | 15 +
 gcc/testsuite/gcc.dg/pr119071.c| 45 ++
 2 files changed, 60 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr119071.c 
b/gcc/testsuite/gcc.c-torture/execute/pr119071.c
new file mode 100644
index ..91f29cce3d55
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr119071.c
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/119071 */
+
+int a, b;
+
+int
+main ()
+{
+  int c = 0;
+  if (a + 2)
+c = 1;
+  int d = (1 + c - 2 + c == 1) - 1;
+  b = ((d + 1) << d) + d;
+  if (b != 1)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/pr119071.c b/gcc/testsuite/gcc.dg/pr119071.c
new file mode 100644
index ..ade1d288d2ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr119071.c
@@ -0,0 +1,45 @@
+/* PR rtl-optimization/119071 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fgimple" } */
+
+int a, b;
+
+int __GIMPLE (ssa,startwith("expand"))
+foo (void)
+{
+  int _1;
+  int _2;
+  int _3;
+  int _5;
+  _Bool _7;
+  int _8;
+  int _9;
+  _Bool _14;
+  int _15;
+  int _16;
+  _Bool _17;
+  int _18;
+
+  __BB(2):
+  _1 = a;
+  _17 = _1 != _Literal (int) -2;
+  _18 = (int) _17;
+  _2 = _18 + _Literal (int) -1;
+  _3 = _2 + _18;
+  _14 = _3 != 1;
+  _15 = (int) _14;
+  _16 = -_15;
+  _7 = _3 == 1;
+  _9 = (int) _7;
+  _5 = _9 << _16;
+  _8 = _5 - _15;
+  b = _8;
+  return _8;
+}
+
+int
+main ()
+{
+  if (foo () != 1)
+__builtin_abort ();
+}


[gcc r15-7808] libstdc++: Remove stray comma in testing docs

2025-03-04 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:ac16d6d74fcb4ca10c939b00782b4dfada666273

commit r15-7808-gac16d6d74fcb4ca10c939b00782b4dfada666273
Author: Jonathan Wakely 
Date:   Tue Mar 4 11:13:23 2025 +

libstdc++: Remove stray comma in testing docs

libstdc++-v3/ChangeLog:

* doc/xml/manual/test.xml: Remove stray comma.
* doc/html/manual/test.html: Regenerate.

Diff:
---
 libstdc++-v3/doc/html/manual/test.html | 2 +-
 libstdc++-v3/doc/xml/manual/test.xml   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/test.html 
b/libstdc++-v3/doc/html/manual/test.html
index 1c7af1193daf..4b295e583f61 100644
--- a/libstdc++-v3/doc/html/manual/test.html
+++ b/libstdc++-v3/doc/html/manual/test.html
@@ -230,7 +230,7 @@ cat 27_io/objects/char/3_xin.in | a.out
 
   The testsuite will create a number of files in the directory in
-  which you run this command,.  Some of those files might use the
+  which you run this command.  Some of those files might use the
   same name as files created by other testsuites (like the ones
   for GCC and G++), so you should not try to run all the
   testsuites in parallel from the same directory.
diff --git a/libstdc++-v3/doc/xml/manual/test.xml 
b/libstdc++-v3/doc/xml/manual/test.xml
index 6b7f1b04a2ac..963e3e135009 100644
--- a/libstdc++-v3/doc/xml/manual/test.xml
+++ b/libstdc++-v3/doc/xml/manual/test.xml
@@ -381,7 +381,7 @@ cat 27_io/objects/char/3_xin.in | a.out
 
 
   The testsuite will create a number of files in the directory in
-  which you run this command,.  Some of those files might use the
+  which you run this command.  Some of those files might use the
   same name as files created by other testsuites (like the ones
   for GCC and G++), so you should not try to run all the
   testsuites in parallel from the same directory.


[gcc r15-7809] aarch64: force operand to fresh register to avoid subreg issues [PR118892]

2025-03-04 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:d883f3233c4b7e0dce52539a12df8aff43e4

commit r15-7809-gd883f3233c4b7e0dce52539a12df8aff43e4
Author: Tamar Christina 
Date:   Tue Mar 4 11:15:26 2025 +

aarch64: force operand to fresh register to avoid subreg issues [PR118892]

When the input is already a subreg and we try to make a paradoxical
subreg out of it for copysign this can fail if it violates the subreg
relationship.

Use force_lowpart_subreg instead of lowpart_subreg to then force the
results to a register instead of ICEing.

gcc/ChangeLog:

PR target/118892
* config/aarch64/aarch64.md (copysign3): Use
force_lowpart_subreg instead of lowpart_subreg.

gcc/testsuite/ChangeLog:

PR target/118892
* gcc.target/aarch64/copysign-pr118892.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.md|  2 +-
 gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cfe730f3732c..b10059e4f580 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7480,7 +7480,7 @@
 {
   emit_insn (gen_ior3 (
lowpart_subreg (mode, operands[0], mode),
-   lowpart_subreg (mode, operands[1], mode),
+   force_lowpart_subreg (mode, operands[1], mode),
v_bitmask));
   DONE;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c 
b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
new file mode 100644
index ..adfa30dc3e2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+double l();
+double f()
+{
+  double t6[2] = {l(), l()};
+  double t7[2];
+  __builtin_memcpy(&t7, &t6, sizeof(t6));
+  return -__builtin_fabs(t7[1]);
+}


[gcc r15-7818] libgo: bump libgo version for GCC 15 release

2025-03-04 Thread Ian Lance Taylor via Gcc-cvs
https://gcc.gnu.org/g:8d7762945bba3c8321b5856cee3d5e5aed55facf

commit r15-7818-g8d7762945bba3c8321b5856cee3d5e5aed55facf
Author: Ian Lance Taylor 
Date:   Mon Mar 3 19:39:30 2025 -0800

libgo: bump libgo version for GCC 15 release

For PR go/119098

Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/654477

Diff:
---
 gcc/go/gofrontend/MERGE | 2 +-
 libgo/configure | 2 +-
 libgo/configure.ac  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 7c93a2e91233..b6fdf72dcec4 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-96014b17d9a846d1d878ac4732c2baaf5ee8b2d2
+f5c453aa726ebb509e7b8cb20df7734f0e411404
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/configure b/libgo/configure
index c0d0a1560f59..b1a2228fa1b4 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -2611,7 +2611,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 ac_config_headers="$ac_config_headers config.h"
 
 
-libtool_VERSION=23:0:0
+libtool_VERSION=24:0:0
 
 
 # Default to --enable-multilib
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 898091276f71..0b05551aacb2 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -10,7 +10,7 @@ AC_INIT(package-unused, version-unused,, libgo)
 AC_CONFIG_SRCDIR(Makefile.am)
 AC_CONFIG_HEADER(config.h)
 
-libtool_VERSION=23:0:0
+libtool_VERSION=24:0:0
 AC_SUBST(libtool_VERSION)
 
 AM_ENABLE_MULTILIB(, ..)


[gcc r14-11379] arm: xfail gcc.target/arm/unsigned-extend-1.c for thumb1

2025-03-04 Thread Torbjorn Svensson via Gcc-cvs
https://gcc.gnu.org/g:7fb1d7bff18d318de00021765e1e12b0d56e7ac1

commit r14-11379-g7fb1d7bff18d318de00021765e1e12b0d56e7ac1
Author: Richard Earnshaw 
Date:   Mon Mar 3 15:30:58 2025 +

arm: xfail gcc.target/arm/unsigned-extend-1.c for thumb1

Partial backport of 2a502f9e4c5c6a8e908ef1b0b5c03fb2e4bd4390.

gcc/testsuite:
* gcc.target/arm/unsigned-extend-1.c: Expand check for any
insn suggesting a zero-extend.  XFAIL for thumb1 code.

Signed-off-by: Torbjörn SVENSSON 

Diff:
---
 gcc/testsuite/gcc.target/arm/unsigned-extend-1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 
b/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c
index 3b4ab048fb09..fa3d34400bfa 100644
--- a/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c
+++ b/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c
@@ -5,5 +5,5 @@ unsigned char foo (unsigned char c)
 {
   return (c >= '0') && (c <= '9');
 }
-
-/* { dg-final { scan-assembler-not "uxtb" } } */
+/* We shouldn't need any zero-extension idioms here.  */
+/* { dg-final { scan-assembler-not "\t(uxtb|and|lsr|lsl)" { xfail arm_thumb1 } 
} } */


[gcc r15-7821] LoongArch: Fix incorrect reorder of __lsx_vldx and __lasx_xvldx [PR119084]

2025-03-04 Thread Xi Ruoyao via Gcc-cvs
https://gcc.gnu.org/g:4856292f7a680ec478e7607f1b71781996d7d542

commit r15-7821-g4856292f7a680ec478e7607f1b71781996d7d542
Author: Xi Ruoyao 
Date:   Sun Mar 2 19:02:50 2025 +0800

LoongArch: Fix incorrect reorder of __lsx_vldx and __lasx_xvldx [PR119084]

They could be incorrectly reordered with store instructions like st.b
because the RTL expression does not have a memory_operand or a (mem)
expression.  The incorrect reorder has been observed in openh264 LTO
build.

Expand them to a (mem) expression instead of unspec to fix the issue.
Then we need to make loongarch_address_insns return 1 for
ADDRESS_REG_REG because the constraint "R" expects this behavior, or
the vldx instruction will be considered invalid by the register
allocate pass and turned to add.d + vld.  Apply the ADDRESS_REG_REG
penalty in loongarch_address_cost instead, loongarch_rtx_costs should
also call loongarch_address_cost instead of loongarch_address_insns
then.

Closes: https://github.com/cisco/openh264/issues/3857

gcc/ChangeLog:

PR target/119084
* config/loongarch/lasx.md (UNSPEC_LASX_XVLDX): Remove.
(lasx_xvldx): Remove.
* config/loongarch/lsx.md (UNSPEC_LSX_VLDX): Remove.
(lsx_vldx): Remove.
* config/loongarch/simd.md (QIVEC): New define_mode_iterator.
(_vldx): New define_expand.
* config/loongarch/loongarch.cc (loongarch_address_insns_1): New
static function with most logic factored out from ...
(loongarch_address_insns): ... here.  Call
loongarch_address_insns_1 with reg_reg_cost = 1.
(loongarch_address_cost): Call loongarch_address_insns_1 with
reg_reg_cost = la_addr_reg_reg_cost.

gcc/testsuite/ChangeLog:

PR target/119084
* gcc.target/loongarch/pr119084.c: New test.

Diff:
---
 gcc/config/loongarch/lasx.md  | 13 
 gcc/config/loongarch/loongarch.cc | 48 ---
 gcc/config/loongarch/lsx.md   | 13 
 gcc/config/loongarch/simd.md  |  9 +
 gcc/testsuite/gcc.target/loongarch/pr119084.c | 24 ++
 5 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index e4505c1660d1..43e3ab0026ab 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -119,7 +119,6 @@
   UNSPEC_LASX_XVSSRLRN
   UNSPEC_LASX_XVEXTL_QU_DU
   UNSPEC_LASX_XVLDI
-  UNSPEC_LASX_XVLDX
   UNSPEC_LASX_XVSTX
   UNSPEC_LASX_VECINIT_MERGE
   UNSPEC_LASX_VEC_SET_INTERNAL
@@ -3579,18 +3578,6 @@
   [(set_attr "type" "simd_load")
(set_attr "mode" "V4DI")])
 
-(define_insn "lasx_xvldx"
-  [(set (match_operand:V32QI 0 "register_operand" "=f")
-   (unspec:V32QI [(match_operand:DI 1 "register_operand" "r")
-  (match_operand:DI 2 "reg_or_0_operand" "rJ")]
- UNSPEC_LASX_XVLDX))]
-  "ISA_HAS_LASX"
-{
-  return "xvldx\t%u0,%1,%z2";
-}
-  [(set_attr "type" "simd_load")
-   (set_attr "mode" "V32QI")])
-
 (define_insn "lasx_xvstx"
   [(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "reg_or_0_operand" "rJ")))
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index f2177f892fba..68f5d8584765 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2363,14 +2363,9 @@ loongarch_index_address_p (rtx addr, machine_mode mode 
ATTRIBUTE_UNUSED)
   return true;
 }
 
-/* Return the number of instructions needed to load or store a value
-   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
-   Assume that multiword moves may need to be split into word moves
-   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
-   enough.  */
-
-int
-loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+static int
+loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p,
+  int reg_reg_cost)
 {
   struct loongarch_address_info addr;
   int factor;
@@ -2405,7 +2400,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool 
might_split_p)
return factor;
 
   case ADDRESS_REG_REG:
-   return factor * la_addr_reg_reg_cost;
+   return factor * reg_reg_cost;
 
   case ADDRESS_CONST_INT:
return lsx_p ? 0 : factor;
@@ -2420,6 +2415,18 @@ loongarch_address_insns (rtx x, machine_mode mode, bool 
might_split_p)
   return 0;
 }
 
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
+   Assume that multiword moves may need to be split into word moves
+   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+   enough.  */
+
+int
+loongarch_address_insns (rtx x, mach