[PATCH] x86: Enable *mov_and only for -Oz

2025-05-24 Thread H.J. Lu
commit ef26c151c14a87177d46fd3d725e7f82e040e89f
Author: Roger Sayle 
Date:   Thu Dec 23 12:33:07 2021 +

x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.

transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz.  But

(define_insn "*mov_and"
  [(set (match_operand:SWI248 0 "memory_operand" "=m")
(match_operand:SWI248 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "and{}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "")
   (set_attr "length_immediate" "1")])

isn't guarded for -Oz.  As a result, "and $0,mem" is generated without
-Oz.  Enable *mov_and only for -Oz.

gcc/

PR target/120427
* config/i386/i386.md (*mov_and): Enable only for -Oz.

gcc/testsuite/

PR target/120427
* gcc.target/i386/pr120427.c: New test.

OK for master?

-- 
H.J.
From ff829a2a7e13e1f6b1333f169b2f6adae6a5c192 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sun, 25 May 2025 07:40:29 +0800
Subject: [PATCH] x86: Enable *mov_and only for -Oz

commit ef26c151c14a87177d46fd3d725e7f82e040e89f
Author: Roger Sayle 
Date:   Thu Dec 23 12:33:07 2021 +

x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.

transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz.  But

(define_insn "*mov_and"
  [(set (match_operand:SWI248 0 "memory_operand" "=m")
(match_operand:SWI248 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "and{}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "")
   (set_attr "length_immediate" "1")])

isn't guarded for -Oz.  As a result, "and $0,mem" is generated without
-Oz.  Enable *mov_and only for -Oz.

gcc/

	PR target/120427
	* config/i386/i386.md (*mov_and): Enable only for -Oz.

gcc/testsuite/

	PR target/120427
	* gcc.target/i386/pr120427.c: New test.

Signed-off-by: H.J. Lu 
---
 gcc/config/i386/i386.md  |  3 ++-
 gcc/testsuite/gcc.target/i386/pr120427.c | 28 
 2 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120427.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b7a18d583da..6dcdff46642 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2442,7 +2442,8 @@ (define_insn "*mov_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
(clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
+  "reload_completed
+   && optimize_insn_for_size_p () && optimize_size > 1"
   "and{}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
(set_attr "mode" "")
diff --git a/gcc/testsuite/gcc.target/i386/pr120427.c b/gcc/testsuite/gcc.target/i386/pr120427.c
new file mode 100644
index 000..2c2888b189d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids" } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]*\\\$0, \[0-9\]*\\(" } } */
+
+struct __pthread_mutex_s
+{
+  int __lock;
+  unsigned int __count;
+  int __owner;
+  unsigned int __nusers;
+  int __kind;
+  short __spins;
+  short __elision;
+  void *p[2];
+};
+typedef union
+{
+  struct __pthread_mutex_s __data;
+  char __size[40];
+  long int __align;
+} pthread_mutex_t;
+typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t;
+void
+foo (__rtld_lock_recursive_t *lock, int i)
+{
+  lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1,
+  0, 0, { ((void *)0) , ((void *)0) } } }};
+}
-- 
2.49.0



[to-be-committed][RISC-V] shift+and+shift for logical and synthesis

2025-05-24 Thread Jeff Law

The next chunk of Shreya's work.

For this expansion we want to detect cases when the mask fits in a 
simm12 after shifting right by the number of trailing zeros in the mask.


In that case we can synthesize the AND with a shift right, andi and 
shift left.  I saw this case come up when doing some experimentation 
with mvconst_internal removed.


This doesn't make any difference in spec right now, mvconst_internal 
will turn the sequence back into a constant load + and with register. 
But Shreya and I have reviewed the .expand dump on hand written tests 
and verified we're getting the synthesis we want.


Tested on riscv32-elf and riscv64-elf.  Waiting on upstream CI's verdict 
before moving forward.


Jeffgcc/
* config/riscv/riscv.cc (synthesize_and): Use a srl+andi+sll
sequence when the mask fits in a simm12 after shifting by the
number of trailing zeros.

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 18c8e188f23..24c7acab744 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14563,6 +14563,36 @@ synthesize_and (rtx operands[3])
   return true;
 }
 
+  /* If we shift right to eliminate the trailing zeros and
+ the result is a SMALL_OPERAND, then it's a shift right,
+ andi and shift left. */
+  t = INTVAL (operands[2]);
+  t >>= ctz_hwi (t);
+  if (budget >= 3 && SMALL_OPERAND (t) && popcount_hwi (t) > 2)
+{
+  /* Shift right to clear the low order bits.  */
+  unsigned HOST_WIDE_INT count = ctz_hwi (INTVAL (operands[2]));
+  rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+  output = gen_reg_rtx (word_mode);
+  emit_insn (gen_rtx_SET (output, x));
+  input = output;
+  
+  /* Now emit the ANDI.  */
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+  mask >>= ctz_hwi (mask);
+  x = gen_rtx_AND (word_mode, input, GEN_INT (mask));
+  output = gen_reg_rtx (word_mode);
+  emit_insn (gen_rtx_SET (output, x));
+  input = output;
+
+  /* Shift left to move bits into position.  */
+  count = INTVAL (operands[2]);
+  count = ctz_hwi (count);
+  x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+  emit_insn (gen_rtx_SET (operands[0], x));
+  return true;
+}
+
   /* If there are all zeros, except for a run of 1s somewhere in the middle
  of the constant, then this is at worst 3 shifts.  */
   t = INTVAL (operands[2]);


[PATCH] x86: Enable *mov_(and|or) only for -Oz

2025-05-24 Thread H.J. Lu
On Sun, May 25, 2025 at 7:47 AM H.J. Lu  wrote:
>
> commit ef26c151c14a87177d46fd3d725e7f82e040e89f
> Author: Roger Sayle 
> Date:   Thu Dec 23 12:33:07 2021 +
>
> x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.
>
> transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz.  But
>
> (define_insn "*mov_and"
>   [(set (match_operand:SWI248 0 "memory_operand" "=m")
> (match_operand:SWI248 1 "const0_operand"))
>(clobber (reg:CC FLAGS_REG))]
>   "reload_completed"
>   "and{}\t{%1, %0|%0, %1}"
>   [(set_attr "type" "alu1")
>(set_attr "mode" "")
>(set_attr "length_immediate" "1")])
>
> isn't guarded for -Oz.  As a result, "and $0,mem" is generated without
> -Oz.  Enable *mov_and only for -Oz.
>
> gcc/
>
> PR target/120427
> * config/i386/i386.md (*mov_and): Enable only for -Oz.
>
> gcc/testsuite/
>
> PR target/120427
> * gcc.target/i386/pr120427.c: New test.
>
> OK for master?
>

"mov $-1,mem" has the same issue.  Here is the updated patch to also
enable "or $-1,mem" only for -Oz.

OK for master?

-- 
H.J.
From 3722d3e03e1816f2814801092893f4ec35775333 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sun, 25 May 2025 07:40:29 +0800
Subject: [PATCH] x86: Enable *mov_(and|or) only for -Oz

commit ef26c151c14a87177d46fd3d725e7f82e040e89f
Author: Roger Sayle 
Date:   Thu Dec 23 12:33:07 2021 +

x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.

transformed "mov $0,mem" to the shorter "and $0,mem" and "mov $-1,mem"
to the shorter "or $-1,mem" for -Oz.  But

(define_insn "*mov_and"
  [(set (match_operand:SWI248 0 "memory_operand" "=m")
(match_operand:SWI248 1 "const0_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "and{}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "")
   (set_attr "length_immediate" "1")])

and

(define_insn "*mov_or"
  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
(match_operand:SWI248 1 "constm1_operand"))
   (clobber (reg:CC FLAGS_REG))]
  "reload_completed"
  "or{}\t{%1, %0|%0, %1}"
  [(set_attr "type" "alu1")
   (set_attr "mode" "")
   (set_attr "length_immediate" "1")])

aren't guarded for -Oz.  As a result, "and $0,mem" and "or $-1,mem" are
generated without -Oz.  Enable *mov_and and *mov_or only
for -Oz.

gcc/

	PR target/120427
	* config/i386/i386.md (*mov_and): Enable only for -Oz.
	(*mov_or): Likewise.

gcc/testsuite/

	PR target/120427
	* gcc.target/i386/pr120427-1.c: New test.
	* gcc.target/i386/pr120427-2.c: Likewise.

Signed-off-by: H.J. Lu 
---
 gcc/config/i386/i386.md|  6 +++--
 gcc/testsuite/gcc.target/i386/pr120427-1.c | 28 ++
 gcc/testsuite/gcc.target/i386/pr120427-2.c | 28 ++
 3 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b7a18d583da..67b05a2a78d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2442,7 +2442,8 @@ (define_insn "*mov_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
(clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
+  "reload_completed
+   && optimize_insn_for_size_p () && optimize_size > 1"
   "and{}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
(set_attr "mode" "")
@@ -2452,7 +2453,8 @@ (define_insn "*mov_or"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
 	(match_operand:SWI248 1 "constm1_operand"))
(clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
+  "reload_completed
+   && optimize_insn_for_size_p () && optimize_size > 1"
   "or{}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
(set_attr "mode" "")
diff --git a/gcc/testsuite/gcc.target/i386/pr120427-1.c b/gcc/testsuite/gcc.target/i386/pr120427-1.c
new file mode 100644
index 000..7f1690e49b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids" } */
+/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]+\\\$0, \[0-9\]*\\(" } } */
+
+struct __pthread_mutex_s
+{
+  int __lock;
+  unsigned int __count;
+  int __owner;
+  unsigned int __nusers;
+  int __kind;
+  short __spins;
+  short __elision;
+  void *p[2];
+};
+typedef union
+{
+  struct __pthread_mutex_s __data;
+  char __size[40];
+  long int __align;
+} pthread_mutex_t;
+typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t;
+void
+foo (__rtld_lock_recursive_t *lock, int i)
+{
+  lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1,
+  0, 0, { ((void *)0) , ((void *)0) } } }};
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr120427-2.c b/gcc/testsuite/gcc.target/i386/pr120427-2.c
new file mode 100644
index 000..a380c128ccb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120427-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-

[to-be-committed][RISC-V] Add andi+bclr synthesis

2025-05-24 Thread Jeff Law
So this patch from Shreya adds the ability to use andi + a series of 
bclr insns to synthesize a logical AND, much like we're doing for 
IOR/XOR using ori+bset or their xor equivalents.


This would regress from a code quality standpoint if we didn't make some 
adjustments to a handful of define_insn_and_split patterns in the riscv 
backend which support the same kind of idioms.


Essentially we turn those define_insn_and_split patterns into the simple 
define_splits they always should have been.  That's been the plan since 
we started down this path -- now is the time to make that change for a 
subset of patterns.  It may be the case that when we're finished we may 
not even need those patterns.  That's still TBD.


I'm aware of one minor regression in xalan.  As seen elsewhere, combine 
reconstructs the mask value, uses mvconst_internal to load it into a reg 
then an and instruction.  That looks better than the operation 
synthesis, but only because of the mvconst_internal little white lie.


This patch does help in a variety of places.  It's fairly common in 
gimple.c from 502.gcc to see cases where we'd use bclr to clear a bit, 
then set the exact same bit a few instructions later.  That was an 
artifact of using a define_insn_and_split -- it wasn't obvious to 
combine that we had two instructions manipulating the same bit.  Now 
that is obvious to combine and the redundant operation gets removed.


This has spun in my tester with no regressions on riscv32-elf and 
riscv64-elf.   Hopefully the baseline for the tester as stepped forward :-)



jeffgcc/
* config/riscv/bitmanip.md (andi+bclr splits): Simplified from
prior define_insn_and_splits.
* config/riscv/riscv.cc (synthesize_and): Add support for andi+bclr
sequences.  

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 85ace285ff0..21426f49679 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -846,14 +846,12 @@ (define_insn "*bclri"
   [(set_attr "type" "bitmanip")])
 
 ;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri_nottwobits"
-  [(set (match_operand:X 0 "register_operand" "=r")
-   (and:X (match_operand:X 1 "register_operand" "r")
-  (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+   (and:X (match_operand:X 1 "register_operand")
+  (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
(set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
@@ -862,20 +860,17 @@ (define_insn_and_split "*bclri_nottwobits"
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; In case of a paradoxical subreg, the sign bit and the high bits are
 ;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-   (and:DI (match_operand:DI 1 "register_operand" "r")
-   (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))
-   (clobber (match_scratch:DI 3 "=&r"))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+   (and:DI (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBS
&& clz_hwi (~UINTVAL (operands[2])) > 33"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
(set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
 {
@@ -884,8 +879,7 @@ (define_insn_and_split "*bclridisi_nottwobits"
 
   operands[4] = GEN_INT (~bits | topbit);
   operands[5] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+})
 
 ;; An outer AND with a constant where bits 31..63 are 0 can be seen as
 ;; a virtual zero extension from 31 to 64 bits.
@@ -1061,14 +1055,12 @@ (define_insn_and_split "*i_extrabit"
 [(set_attr "type" "bitmanip")])
 
 ;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi_extrabit"
-  [(set (match_operand:X 0 "register_operand" "=r")
-   (and:X (match_operand:X 1 "register_operand" "r")
-  (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))
-   (clobber (match_scratch:X 3 "=&r"))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+   (and:X (match_operand:X 1 "register_operand")
+  (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 3) (and:

[Ada] Install ACATS 4.2 testsuite in acats-4 directory

2025-05-24 Thread Eric Botcazou
It corresponds to revision 4.2A of http://www.ada-auth.org/acats.html minus 
the same chapters as for ACATS 2.6.  It is not run, the missing step being:

--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -1108,7 +1108,7 @@ check-ada-subtargets: check-acats-subtargets
 # No ada-specific selftests
 selftest-ada:
 
-ACATSDIR = $(TESTSUITEDIR)/ada/acats-2
+ACATSDIR = $(TESTSUITEDIR)/ada/acats-4
 ACATSCMD = run_acats.sh
 
 check_acats_numbers0:=1 2 3 4 5 6 7 8 9


The results on x86-64/Linux are:

=== acats Summary ===
# of expected passes2578
# of unexpected failures0
Native configuration is x86_64-suse-linux-gnu


Applied on the mainline.


2025-05-24  Eric Botcazou  

testsuite/
* ada/acats-4: Add ACATS 4.2 testsuite.

-- 
Eric Botcazou




Re: [PATCH 2/2] VR-VALUES: Rewrite test_for_singularity using range_op_handler

2025-05-24 Thread Jeff Law




On 5/23/25 8:56 AM, Andrew MacLeod wrote:

Since the PR scrolled by, i don't think I ever noticed this thread.. or 
at least where it lead.


This is not an "equivalence" that ranger would propagate because its a 
relation with a constant.


the issue here seems to be:

if (x < 4)   when x has a range something like [0,0][10, 40][30, +INF]

on the true edge, we will propagate the RANGE [0,0] for x and any pass 
which queries the range of x will get [0,0],  just as if if was if (x == 0)

likewise, on the false edge we'll propagate x = [10,40][30, +INF]

so although we don't propagate an *equivalence* as such,  any pass which 
queries the range of X will get the [0,0] range one expects.
Which is as good as propagation I think since a query for the range 
returns a singleton, so if we had a subsequent dest = a + x we'd 
simplify that to just dest = a.




As for the transformation...  Perhaps what we are really interested in 
checking here is whether all further uses of x after this statement are 
dominated by one edge of the condition, in which case you do the 
transformation to the check for a singularity. or you could check 
that there are no dominated uses on the OTHER side of of the condition 
that might utilize the lost information.. if no one used x on the false 
side.. it doesn't matter.
I just think we want the transformation to go away.  The only thing we 
were trying to do was collapse it down to an EQ/NE so that we had an 
equivalence on one arm that could then be used to optimize code on that 
arm.  Ranger gives us that same capability without the undesirable side 
effects.


Jeff


Andrew






Re: [PATCH v2] libstdc++: Implement C++26 std::indirect [PR119152]

2025-05-24 Thread NightStrike
On Thu, May 22, 2025 at 08:54 Tomasz Kamiński  wrote:

> From: Jonathan Wakely 
>
> This papers implements C++26 std::indirect as specified


“This patch”?

>


[PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-24 Thread Yuao Ma
Hi Steve,

Thanks for your review! I've updated the patch.

> this range_check() is unneeded.

Done.

> As a side note, the error message is slightly misleading
> (although it will not be issued).  Technically, x = -1 or 1
> are allowed values, and neither is **between** -1 and 1.

You're right, the original message was a bit imprecise. I've updated this and
five other similar error messages in the patch for better accuracy.

Yuao



0001-fortran-add-constant-input-support-for-trig-function.patch
Description: 0001-fortran-add-constant-input-support-for-trig-function.patch


[PATCH v1] libstdc++: Fix bug in default ctor of extents.

2025-05-24 Thread Luc Grosheintz
The array that stores the dynamic extents used to be default
initialized. The standard requires value intialization. This
commit fixes the bug and adds a test.

libstdc++-v3/ChangeLog:

* include/std/mdspan: Value initialize the array storing the
dynamic extents.
* testsuite/23_containers/mdspan/extents/ctor_default.cc: New
test.

Signed-off-by: Luc Grosheintz 
---
 libstdc++-v3/include/std/mdspan   |  2 +-
 .../mdspan/extents/ctor_default.cc| 41 +++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 47cfa405e44..bcf2fa60fea 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   private:
using _S_storage = __array_traits<_IndexType, _S_rank_dynamic>::_Type;
-   [[no_unique_address]] _S_storage _M_dynamic_extents;
+   [[no_unique_address]] _S_storage _M_dynamic_extents{};
   };
 
 template
diff --git 
a/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
new file mode 100644
index 000..eec300f6896
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
@@ -0,0 +1,41 @@
+// { dg-do run { target c++23 } }
+#include 
+
+#include 
+#include 
+
+constexpr auto dyn = std::dynamic_extent;
+
+template
+  constexpr void
+  test_default_ctor()
+  {
+Extents exts;
+for(size_t i = 0; i < Extents::rank(); ++i)
+  if(exts.static_extent(i) == std::dynamic_extent)
+   VERIFY(exts.extent(i) == 0);
+  else
+   VERIFY(exts.extent(i) == Extents::static_extent(i));
+  }
+
+constexpr bool
+test_default_ctor_all()
+{
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  test_default_ctor>();
+  return true;
+}
+
+int
+main()
+{
+  test_default_ctor_all();
+  static_assert(test_default_ctor_all());
+  return 0;
+}
-- 
2.49.0



[Ada] Move ACATS 2.6 testsuite to acats-2 directory

2025-05-24 Thread Eric Botcazou
This is in preparation for the installation of the ACATS 4.2 testsuite.

Tested on x86-64/Linux, applied on the mainline.


2025-05-24  Eric Botcazou  

ada/
* gcc-interface/Make-lang.in (ACATSDIR): Use acats-2 directory.
testsuite/
* ada/acats/*: Rename into ada/acats-2/*.
* ada/acats-2/run_all.sh: Adjust to above renaming.
* ada/acats-2/support/impdefc.a: New file.

-- 
Eric Botcazoucommit ba3b42807e70f381e3a4e3e47662a22e7634f3a6
Author: Eric Botcazou 
Date:   Sat May 24 12:04:19 2025 +0200

Ada: move ACATS 2.6 testsuite to acats-2 directory

This is in preparation for the installation of the ACATS 4.2 testsuite.

gcc/ada/
* gcc-interface/Make-lang.in (ACATSDIR): Use acats-2 directory.

gcc/testsuite/
* ada/acats/*: Rename into ada/acats-2/*.
* ada/acats-2/run_all.sh: Adjust to above renaming.
* ada/acats-2/support/impdefc.a: New file.

diff --git a/gcc/ada/gcc-interface/Make-lang.in b/gcc/ada/gcc-interface/Make-lang.in
index 964cae83c55..2158bb68cce 100644
--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -1108,7 +1108,7 @@ check-ada-subtargets: check-acats-subtargets check-gnat-subtargets
 # No ada-specific selftests
 selftest-ada:
 
-ACATSDIR = $(TESTSUITEDIR)/ada/acats
+ACATSDIR = $(TESTSUITEDIR)/ada/acats-2
 ACATSCMD = run_acats.sh
 
 check_acats_numbers0:=1 2 3 4 5 6 7 8 9
diff --git a/gcc/testsuite/ada/acats/run_all.sh b/gcc/testsuite/ada/acats-2/run_all.sh
similarity index 97%
rename from gcc/testsuite/ada/acats/run_all.sh
rename to gcc/testsuite/ada/acats-2/run_all.sh
index 2f737854c60..a57e3831abb 100755
--- a/gcc/testsuite/ada/acats/run_all.sh
+++ b/gcc/testsuite/ada/acats-2/run_all.sh
@@ -103,14 +103,14 @@ gnatls -v >> $dir/acats.log
 display ""
 
 if [ -n "$GCC_RUNTEST_PARALLELIZE_DIR" ]; then
-  dir_support=$dir/../acats/support
+  dir_support=$dir/../acats-2/support
 
   rm -rf $dir/run
   mv $dir/tests $dir/tests.$$ 2> /dev/null
   rm -rf $dir/tests.$$ &
   mkdir -p $dir/run
 
-  cp -pr $dir/../acats/tests $dir/
+  cp -pr $dir/../acats-2/tests $dir/
 else
   dir_support=$dir/support
 
@@ -336,7 +336,9 @@ for chapter in $chapters; do
   echo "BUILD $main" >> $dir/acats.log
   EXTERNAL_OBJECTS=""
   case $i in
-cxb30*) EXTERNAL_OBJECTS="$dir_support/cxb30040.o $dir_support/cxb30060.o $dir_support/cxb30130.o $dir_support/cxb30131.o";;
+cxb3004) EXTERNAL_OBJECTS="$dir_support/cxb30040.o";;
+cxb3006) EXTERNAL_OBJECTS="$dir_support/cxb30060.o";;
+cxb3013) EXTERNAL_OBJECTS="$dir_support/cxb30130.o $dir_support/cxb30131.o";;
 ca1020e) rm -f ca1020e_func1.adb ca1020e_func2.adb ca1020e_proc1.adb ca1020e_proc2.adb > /dev/null 2>&1;;
 ca14028) rm -f ca14028_func2.ads ca14028_func3.ads ca14028_proc1.ads ca14028_proc3.ads > /dev/null 2>&1;;
   esac
diff --git a/gcc/testsuite/ada/acats-2/support/impdefc.a b/gcc/testsuite/ada/acats-2/support/impdefc.a
new file mode 100644
index 000..6a27a06714d
--- /dev/null
+++ b/gcc/testsuite/ada/acats-2/support/impdefc.a
@@ -0,0 +1,140 @@
+-- IMPDEFC.A
+--
+-- Grant of Unlimited Rights
+--
+-- Under contracts F33600-87-D-0337, F33600-84-D-0280, MDA903-79-C-0687,
+-- F08630-91-C-0015, and DCA100-97-D-0025, the U.S. Government obtained 
+-- unlimited rights in the software and documentation contained herein.
+-- Unlimited rights are defined in DFAR 252.227-7013(a)(19).  By making 
+-- this public release, the Government intends to confer upon all 
+-- recipients unlimited rights  equal to those held by the Government.  
+-- These rights include rights to use, duplicate, release or disclose the 
+-- released technical data and computer software in whole or in part, in 
+-- any manner and for any purpose whatsoever, and to have or permit others 
+-- to do so.
+--
+--DISCLAIMER
+--
+-- ALL MATERIALS OR INFORMATION HEREIN RELEASED, MADE AVAILABLE OR
+-- DISCLOSED ARE AS IS.  THE GOVERNMENT MAKES NO EXPRESS OR IMPLIED 
+-- WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING THE CONDITIONS OF THE
+-- SOFTWARE, DOCUMENTATION OR OTHER INFORMATION RELEASED, MADE AVAILABLE 
+-- OR DISCLOSED, OR THE OWNERSHIP, MERCHANTABILITY, OR FITNESS FOR A
+-- PARTICULAR PURPOSE OF SAID MATERIAL.
+--*
+--
+-- DESCRIPTION:
+-- This package provides tailorable entities for a particular
+-- implementation.  Each entity may be modified to suit the needs
+-- of the implementation.  Default values are provided to act as
+-- a guide.
+--
+-- The entities in this package are those which are used exclusively
+-- in tests for Annex C (Systems Programming).
+-- 
+-- APPLICABILITY CRITERIA:
+-- This package is only required for implementations validating the
+-- Systems Programming Annex.
+-- 
+-- CHANGE HISTORY:
+-- 29 Jan 96   SAIC

[PATCH] rs6000: Remove include of reload.h

2025-05-24 Thread Segher Boessenkool
As one of the last steps in removing old reload, I'll delete the reload.h
header file.  It would be a bit embarrassing if that stopped the target I am
responsible for from working, so let's prevent that.

We do not actually use anything from this header file (checked by building
with this patch, and make check has identical results as well), so it was
easy for our port.  Many other ports will be like this, but some will need
some adjustments.  I'll do cross builds of many ports before it is all over,
but it would be good if other ports tried to remove reload.h from their
includes as well :-)

Committing to trunk.


Segher
---
 gcc/config/rs6000/rs6000.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 12dbde2bc630..9b01d3e29167 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -55,7 +55,6 @@
 #include "output.h"
 #include "common/common-target.h"
 #include "langhooks.h"
-#include "reload.h"
 #include "sched-int.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
-- 
1.8.3.1



[Ada] Install ACATS 3.1 testsuite in acats-3 directory

2025-05-24 Thread Eric Botcazou
It corresponds to revision 3.1DD of http://www.ada-auth.org/acats.html minus 
the same chapters as for ACATS 2.6.  It is not run, the missing step being:

--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -1108,7 +1108,7 @@ check-ada-subtargets: check-acats-subtargets check-gnat-
subtargets
 # No ada-specific selftests
 selftest-ada:
 
-ACATSDIR = $(TESTSUITEDIR)/ada/acats-2
+ACATSDIR = $(TESTSUITEDIR)/ada/acats-3
 ACATSCMD = run_acats.sh
 
 check_acats_numbers0:=1 2 3 4 5 6 7 8 9


The results on x86-64/Linux are:

=== acats Summary ===
# of expected passes2421
# of unexpected failures0

Applied on the mainline.


2025-05-24  Eric Botcazou  

testsuite/
* ada/acats-3: Add ACATS 3.1 testsuite.

-- 
Eric Botcazou




Re: [PATCH v2 2/2] emit-rtl: Validate mode for paradoxical hardware subregs [PR119966]

2025-05-24 Thread Dimitar Dimitrov
On Fri, May 16, 2025 at 06:14:30PM +0100, Richard Sandiford wrote:
> Dimitar Dimitrov  writes:
> > After r16-160-ge6f89d78c1a752, late_combine2 started transforming the
> > following RTL for pru-unknown-elf:
> >
> >   (insn 3949 3948 3951 255 (set (reg:QI 56 r14.b0 [orig:1856 _619 ] [1856])
> >   (and:QI (reg:QI 1 r0.b1 [orig:1855 _201 ] [1855])
> >   (const_int 3 [0x3])))
> >(nil))
> >   ...
> >   (insn 3961 7067 3962 255 (set (reg:SI 56 r14.b0)
> >   (zero_extend:SI (reg:QI 56 r14.b0 [orig:1856 _619 ] [1856])))
> >(nil))
> >
> > into:
> >
> >   (insn 3961 7067 3962 255 (set (reg:SI 56 r14.b0)
> >   (and:SI (subreg:SI (reg:QI 1 r0.b1 [orig:1855 _201 ] [1855]) 0)
> >   (const_int 3 [0x3])))
> >(nil))
> >
> > That caused libbacktrace build to break for pru-unknown-elf.  Register
> > r0.b1 (regno 1) is not valid for SImode, which validate_subreg failed to
> > reject.
> >
> > Fix by calling HARD_REGNO_MODE_OK to ensure that both inner and outer
> > modes are valid for the hardware subreg.
> >
> > This patch fixes the broken PRU toolchain build.  It leaves only two
> > test case regressions for PRU, caused by rnreg pass renaming a valid
> > paradoxical subreg into an invalid one.
> >   gcc.c-torture/execute/20040709-1.c
> >   gcc.c-torture/execute/20040709-2.c
> >
> > PR target/119966
> >
> > gcc/ChangeLog:
> >
> > * emit-rtl.cc (validate_subreg): Validate inner
> > and outer mode for paradoxical hardware subregs.
> >
> > Co-authored-by: Andrew Pinski 
> > Signed-off-by: Dimitar Dimitrov 
> > ---
> >  gcc/emit-rtl.cc | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
> > index e46b0f9eac4..6c5d9b55508 100644
> > --- a/gcc/emit-rtl.cc
> > +++ b/gcc/emit-rtl.cc
> > @@ -983,6 +983,9 @@ validate_subreg (machine_mode omode, machine_mode imode,
> >if ((COMPLEX_MODE_P (imode) || VECTOR_MODE_P (imode))
> >   && GET_MODE_INNER (imode) == omode)
> > ;
> > +  else if (!targetm.hard_regno_mode_ok (regno, imode)
> > +  || !targetm.hard_regno_mode_ok (regno, omode))
> > +   return false;
> 
> It isn't meaningful to test regno against omode, since that isn't
> necessarily the register that would be produced by the subreg.

Do you refer to the register renaming pass?  I can't think of another reason
for the regno of a hardware register in a subreg to be changed.

> 
> ISTR that this is a sensitive part of the codebase.  I think there
> are/were targets that create unfoldable subregs for argument passing
> and return.  And I think e500 had unfoldable subregs of FP registers,
> although that port is gone now.

Could you share what is "unfoldable subreg"?  I could not find this phrase
anywhere in the source, except in one comment in the i386 port.

Perhaps a subreg of a hardware register is "unfoldable" when the hardware
register is not valid in the outer mode?  In which case the subreg cannot be
replaced directly with a hardware register?

> 
> So I suppose the question is: when given a hard register, should
> validate_subreg test whether the subreg can be folded to a hard
> register?  Or is it more relaxed than that?  Do we need different
> rules before LRA (which could fix up subregs through reloading)
> and after LRA (where unfoldable subregs stay unfoldable).

My naive answer _was_ that validate_subreg should always perform checks for
hardware registers.  Now I see it was too naive, because I was not aware of
the different ways targets use subregs.  Hence this patch should be dropped.

Meanwhile PR119966, which this patch hoped to address, got fixed instead
with r16-809-gf725d6765373f7.

> 
> If validate_subreg should test whether a subreg of a hard register
> can be folded to a hard register, the fix would be to use
> simplify_subreg_regno instead of the current tests.  But it looks
> like that was deliberately not done.

When validate_subreg was introduced with r0-63800-gbeb72684810c1a,
simplify_subreg_regno simply did not exit.  The simplify_subreg_regno
itself was added later with r0-89444-geef302d277ea42.

> 
> It might still be worth trying to use simplify_subreg_regno and
> seeing what breaks.  Any fallaout would at least let us expand
> the comments to explain the constraints.

I tried simplify_subreg_regno, and some tests regressed for x86_64-pc-linux-gnu:

  check-gcc-c trunk   patched change
  ---+---++-
  # of expected passes216431  216358  -73
  # of unexpected failures117 197 +80
  # of unexpected successes   25  25  0
  # of expected failures  155215520
  # of unresolved testcases   0   33  +33
  # of unsupported tests  388838880

For reference, here are a few of the newly failing

Re: [PATCH 1/3] LoongArch: testsuite: Fix pr112325.c and pr117888-1.c.

2025-05-24 Thread Lulu Cheng



在 2025/5/23 下午7:07, Xi Ruoyao 写道:

On Fri, 2025-03-07 at 14:14 +0800, Lulu Cheng wrote:

By default, vectorization is not enabled on LoongArch,
resulting in the failure of these two test cases.

Hmm, but wouldn't every test in the vect/ directory automatically get -
mlsx (from DEFAULT_VECTCFLAGS in lib/target-supports.exp)?


This is indeed the case. The reason for the error is that the default 
options are


overwritten by using 'dg-options' in these two test cases.




gcc/testsuite/ChangeLog:

* gcc.dg/vect/pr112325.c: Add the vector compilation
option '-mlsx' for LoongArch.
* gcc.dg/vect/pr117888-1.c: Likewise.

---
  gcc/testsuite/gcc.dg/vect/pr112325.c   | 1 +
  gcc/testsuite/gcc.dg/vect/pr117888-1.c | 1 +
  2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr112325.c
b/gcc/testsuite/gcc.dg/vect/pr112325.c
index 143903beab2..8689fbfe092 100644
--- a/gcc/testsuite/gcc.dg/vect/pr112325.c
+++ b/gcc/testsuite/gcc.dg/vect/pr112325.c
@@ -4,6 +4,7 @@
  /* { dg-require-effective-target vect_shift } */
  /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } }
*/
  /* { dg-additional-options "--param max-completely-peeled-insns=200"
{ target powerpc64*-*-* } } */
+/* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */
  
  typedef unsigned short ggml_fp16_t;

  static float table_f32_f16[1 << 16];
diff --git a/gcc/testsuite/gcc.dg/vect/pr117888-1.c
b/gcc/testsuite/gcc.dg/vect/pr117888-1.c
index 4796a7c83c1..0b31fcdc423 100644
--- a/gcc/testsuite/gcc.dg/vect/pr117888-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr117888-1.c
@@ -4,6 +4,7 @@
  /* { dg-require-effective-target vect_shift } */
  /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } }
*/
  /* { dg-additional-options "--param max-completely-peeled-insns=200"
{ target powerpc64*-*-* } } */
+/* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */
  
  typedef unsigned short ggml_fp16_t;

  static float table_f32_f16[1 << 16];




[PATCH] [lra] force reg update after spilling to memory [PR120424]

2025-05-24 Thread Alexandre Oliva


In the added C++ testcase, a stack slot at a negative sp offset is
used to hold a value across a call.

There are a couple of causes that directly lead to this outcome:

- the -fstack-clash-protection and -fnon-call-exception options, that
cause arm_frame_pointer_required to flip from false to true when the
first pseudo gets spilled to memory;

- when the affected pseudo is spilled to memory, we fail to update lra
regno info, because the insns that reference it are already on the
lra_constraint_insn_stack;

There is another potentially-related issue:

- when we notice that the frame pointer can no longer be eliminated to
the stack pointer, we immediately clear can_eliminate, and also
prev_can_eliminate, but update_reg_eliminate relied on the latter to
tell that it needs to propagate a previous_offset to the
newly-selected elimination, or restore the original offsets.

This patch ensures that we update insn register info after spilling a
pseudo to memory, and enables update_reg_eliminate to recognize the
case in which a previously-preferred elimination is disabled
regardless of prev_can_eliminate.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 on arm-vx7r2,
and manually tested with trunk targeting arm-eabi and arm-linux-gnu.
Ok to install?


for  gcc/ChangeLog

PR rtl-optimization/120424
PR middle-end/118939?
* lra-spills.cc (spill_pseudos): Update insn regno info.
* lra-eliminations.cc (update_reg_eliminate): Recognize
disabling of active elimination regardless of
prev_can_eliminate.

for  gcc/testsuite/ChangeLog

PR rtl-optimization/120424
PR middle-end/118939?
* g++.target/arm/pr120424.C: New.
* gnat.dg/controlled9.adb: New.
* gnat.dg/controlled9_pkg.ads: New.
---
 gcc/lra-eliminations.cc   |2 +-
 gcc/lra-spills.cc |2 +-
 gcc/testsuite/g++.target/arm/pr120424.C   |   34 +
 gcc/testsuite/gnat.dg/controlled9.adb |   10 +
 gcc/testsuite/gnat.dg/controlled9_pkg.ads |5 
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/arm/pr120424.C
 create mode 100644 gcc/testsuite/gnat.dg/controlled9.adb
 create mode 100644 gcc/testsuite/gnat.dg/controlled9_pkg.ads

diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc
index d84a7d1ee99f0..bb708b007a4ee 100644
--- a/gcc/lra-eliminations.cc
+++ b/gcc/lra-eliminations.cc
@@ -1185,7 +1185,7 @@ update_reg_eliminate (bitmap insns_with_changed_offsets)
  setup_can_eliminate (ep, false);
  continue;
}
-  if (ep->can_eliminate != prev && elimination_map[ep->from] == ep)
+  if (!ep->can_eliminate && elimination_map[ep->from] == ep)
{
  /* We cannot use this elimination anymore -- find another
 one.  */
diff --git a/gcc/lra-spills.cc b/gcc/lra-spills.cc
index fc912c43ce6e1..4febc693d2838 100644
--- a/gcc/lra-spills.cc
+++ b/gcc/lra-spills.cc
@@ -556,7 +556,7 @@ spill_pseudos (void)
fprintf (lra_dump_file,
 "Changing spilled pseudos to memory in insn #%u\n",
 INSN_UID (insn));
- lra_push_insn (insn);
+ lra_push_insn_and_update_insn_regno_info (insn);
  if (lra_reg_spill_p || targetm.different_addr_displacement_p ())
lra_set_used_insn_alternative (insn, LRA_UNKNOWN_ALT);
}
diff --git a/gcc/testsuite/g++.target/arm/pr120424.C 
b/gcc/testsuite/g++.target/arm/pr120424.C
new file mode 100644
index 0..4d0e49013c04a
--- /dev/null
+++ b/gcc/testsuite/g++.target/arm/pr120424.C
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv7 -O2 -fstack-clash-protection 
-fnon-call-exceptions" } */
+/* { dg-final { scan-assembler-not {#-8} } } */
+/* LRA register elimination gets confused when register spilling
+   causes arm_frame_pointer_required to switch from false to true, and
+   ends up using a stack slot below sp.  */
+
+void f() {
+  int i = 0, j = 0;
+  asm ("" : : "m" (i), "m" (j));
+}
+
+void g(void (*fn[])(), int i)
+{
+  auto fn0 = fn[i+0];
+  auto fn1 = fn[i+1];
+  auto fn2 = fn[i+2];
+  auto fn3 = fn[i+3];
+  fn0();
+  fn1();
+  if (!fn2)
+throw i+2;
+  fn2();
+  fn3();
+  fn0();
+  fn1();
+}
+
+int
+main()
+{
+  void (*fn[4])() = { f, f, f, f };
+  g (fn, 0);
+}
diff --git a/gcc/testsuite/gnat.dg/controlled9.adb 
b/gcc/testsuite/gnat.dg/controlled9.adb
new file mode 100644
index 0..fb7acce854e12
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/controlled9.adb
@@ -0,0 +1,10 @@
+--  { dg-do run }
+--  { dg-options "-O1 -fstack-check" }
+--  from PR middle-end/118939
+
+with Controlled9_Pkg;
+procedure Controlled9 is
+   S : constant Controlled9_Pkg.T_Access := new Controlled9_Pkg.T;
+begin
+   null;
+end Controlled9;   
diff --git a/gcc/testsuite/gnat.dg/controlled9_pkg.ads 
b/gcc/testsuite/gnat.dg/controlled9_pkg.