[PATCH] expand: Fix up ICE on VCE from _Complex types to _BitInt [PR117458]

2024-11-19 Thread Jakub Jelinek
Hi!

extract_bit_field can't handle extraction of non-mode precision
from complex mode operands which don't live in memory, e.g. gen_lowpart
crashes on those.
The following patch in that case defers the extract_bit_field call
until op0 is forced into memory.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-11-19  Jakub Jelinek  

PR middle-end/117458
* expr.cc (expand_expr_real_1) : Don't
call extract_bit_field if op0 has complex mode and isn't a MEM,
instead first force op0 into memory and then call extract_bit_field.

* gcc.dg/bitint-116.c: New test.

--- gcc/expr.cc.jj  2024-11-08 12:38:10.684495739 +0100
+++ gcc/expr.cc 2024-11-18 18:35:47.259349847 +0100
@@ -12505,7 +12505,9 @@ expand_expr_real_1 (tree exp, rtx target
op0 = convert_modes (mode, GET_MODE (op0), op0,
 TYPE_UNSIGNED (TREE_TYPE (treeop0)));
   /* If the output type is a bit-field type, do an extraction.  */
-  else if (reduce_bit_field && mode != BLKmode)
+  else if (reduce_bit_field
+  && mode != BLKmode
+  && (MEM_P (op0) || !COMPLEX_MODE_P (GET_MODE (op0
return extract_bit_field (op0, TYPE_PRECISION (type), 0,
  TYPE_UNSIGNED (type), NULL_RTX,
  mode, mode, false, NULL);
@@ -12529,6 +12531,11 @@ expand_expr_real_1 (tree exp, rtx target
 
  emit_move_insn (target, op0);
  op0 = target;
+
+ if (reduce_bit_field && mode != BLKmode)
+   return extract_bit_field (op0, TYPE_PRECISION (type), 0,
+ TYPE_UNSIGNED (type), NULL_RTX,
+ mode, mode, false, NULL);
}
 
   /* If OP0 is (now) a MEM, we need to deal with alignment issues.  If the
--- gcc/testsuite/gcc.dg/bitint-116.c.jj2024-11-18 18:50:05.322348174 
+0100
+++ gcc/testsuite/gcc.dg/bitint-116.c   2024-11-18 18:50:57.785614318 +0100
@@ -0,0 +1,11 @@
+/* PR middle-end/117458 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c23 -O2" } */
+
+typedef _BitInt(33) B __attribute__((may_alias));
+
+_BitInt(33)
+foo (_Complex float x)
+{
+  return *(B *)&x;
+}

Jakub



[PATCH v1 2/2] RISC-V: Refine the rtl expand check for strided ld/st

2024-11-19 Thread pan2 . li
From: Pan Li 

This patch would like to remove the unnecessary option for the
strided load/store testcases.  After fix the option from the rvv.exp,
both the O2 and O3 will be passed to the test files for rtl expand
dump check but the O2 has 2 time for IFN while the O3 has 4 times with
-fvectorize specificed.

Thus, add xfail O2 for IFN 4 times check, as well as xfail O3 for 2
times check.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c: Remove
unnecessary optimization option and xfail O2/O3 diff IFN times
from the rtl expand dump.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f64.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i16.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i32.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i64.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-i8.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u16.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u32.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u64.c: Ditto.
* gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-u8.c: Ditto.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-f16.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-f32.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-f64.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i16.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i32.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i64.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-i8.c| 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u16.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u32.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u64.c   | 8 +---
 .../riscv/rvv/autovec/strided/strided_ld_st-1-u8.c| 8 +---
 11 files changed, 55 insertions(+), 33 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c
index 41fe2b20a98..c409c5724b1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f16.c
@@ -1,11 +1,13 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -fno-vect-cost-model 
-fdump-rtl-expand-details" } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -fno-vect-cost-model 
-fdump-rtl-expand-details" } */
 
 #include "strided_ld_st.h"
 
 DEF_STRIDED_LD_ST_FORM_1(_Float16)
 
-/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 4 "expand" } } */
-/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_STORE " 4 "expand" } } 
*/
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 4 "expand" { 
xfail { any-opts "-O2" } } } } */
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_STORE " 4 "expand" { 
xfail { any-opts "-O2" } } } } */
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 2 "expand" { 
xfail { any-opts "-O3" } } } } */
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_STORE " 2 "expand" { 
xfail { any-opts "-O3" } } } } */
 /* { dg-final { scan-assembler-times {vlse16.v} 1 } } */
 /* { dg-final { scan-assembler-times {vsse16.v} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c
index 650b5fce4e8..01e27d62276 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/strided/strided_ld_st-1-f32.c
@@ -1,11 +1,13 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model 
-fdump-rtl-expand-details" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-vect-cost-model 
-fdump-rtl-expand-details" } */
 
 #include "strided_ld_st.h"
 
 DEF_STRIDED_LD_ST_FORM_1(float)
 
-/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 4 "expand" } } */
-/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_STORE " 4 "expand" } } 
*/
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 4 "expand" { 
xfail { any-opts "-O2" } } } } */
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_STORE " 4 "expand" { 
xfail { any-opts "-O2" } } } } */
+/* { dg-final { scan-rtl-dump-times ".MASK_LEN_STRIDED_LOAD " 2 "expand" { 
xfail { any-opts "-O3"

Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Richard Biener
On Mon, 18 Nov 2024, Eric Gallager wrote:

> On Fri, Nov 15, 2024 at 11:08 AM Jakub Jelinek  wrote:
> >
> > Hi!
> >
> > The following patch adds a new tristate option for optimizations related to
> > replaceable global operators new/delete.
> > The option isn't called -fassume-sane-operator-new (which clang++
> > implements), because
> > 1) clang++ option means something different; initially it was an
> >option to add malloc attribute to those declarations (but we have
> >malloc attribute on all  calls already unconditionally);
> >later it was changed to add noalias attribute rather than malloc,
> >whatever it means, but it is certainly about the return value
> >from the operator new (whether it can alias with other pointers);
> >we already assume malloc-ish behavior that it doesn't alias any
> >other pointers
> > 2) the option only affects operator new, we want it affect also
> >operator delete
> > In the past, before PR101480 fix in 2021, we treated the replaceable
> > global operators new/delete when called from new/delete expressions
> > similarly to malloc/free, i.e. we assumed beyond their described
> > behavior they act like const functions, don't read or modify global
> > state (observable in current TU).  That is fine when we treat
> > malloc/free as a blackbox and hope people don't use malloc hooks or
> > don't do something weird in them, or when the replaceable global
> > operators use new/delete under the hood and don't do anything else
> > (except perhaps throwing exceptions) beyond that.
> > clang++ behavior (again only for replaceable global operators new/delete
> > when called from new/delete expressions (or __builtin_operator_{new,delete})
> > is apparently that they act like pure functions beyond what they are
> > documented to do, i.e. can read global state (aka g++.dg/torture/pr101480.C
> > testcase is considered valid), but can't modify it.
> > I think that is because of the license from
> > https://eel.is/c++draft/expr.new#14
> > https://eel.is/c++draft/expr.delete#6
> > that those calls could be omitted, I think valid C++ programs just can't
> > rely on some observable global state modification done by those operators
> > happening.
> >
> > The patch below makes the clang++ behavior the default (i.e. pure-ish
> > operator new/delete when called from new/delete expressions, no assumptions
> > when called directly) and adds options to override this behavior in both
> > directions, to the pre-PR101480 state with e.g.
> > -fassume-sane-operators-new-delete (in fact a little bit further; because
> > the patch makes those assumptions even when calling the operators directly),
> > and to the current trunk state with e.g.
> > -fno-assume-sane-operators-new-delete.
> >
> > I've tried to explain stuff in the documentation too.
> 
> This all seems excessively complicated; can't it be simplified a bit?

I'd like to second this - I don't see value in adding the "intermediate"
state which we never had.  We for quite some time had two extremes, if
we want to have a separate flag controlling this besides -fallocation-dce
the go with dual-state please.  I'd even say we don't need any new
flag but can use the -fallocation-dce umbrella to control how the
middle-end interprets new/delete semantically since the ability to
elide new/delete implies that the user at least doesn't care about
side-effects.  Documenting the behavior clearly is of course good.

Note I can live with an extra flag, but I can't see good reasons for
the intermediate state.

Richard.

> >
> > So far smoke tested, ok for trunk if it passes full bootstrap/regtest?
> >
> > 2024-11-15  Jakub Jelinek  
> >
> > PR c++/110137
> > gcc/
> > * doc/invoke.texi (-fassume-sane-operators-new-delete,
> > -fno-assume-sane-operators-new-delete,
> > -fassume-sane-operators-new-delete=): Document.
> > * gimple.cc (gimple_call_fnspec): Handle
> > -f{,no-}assume-sane-operators-new-delete{,={0,1,2}}.
> > gcc/c-family/
> > * c.opt (fassume-sane-operators-new-delete,
> > fassume-sane-operators-new-delete=): New options.
> > gcc/testsuite/
> > * g++.dg/tree-ssa/pr110137-1.C: New test.
> > * g++.dg/tree-ssa/pr110137-2.C: New test.
> > * g++.dg/tree-ssa/pr110137-3.C: New test.
> > * g++.dg/tree-ssa/pr110137-4.C: New test.
> > * g++.dg/tree-ssa/pr110137-5.C: New test.
> > * g++.dg/tree-ssa/pr110137-6.C: New test.
> >
> > --- gcc/c-family/c.opt.jj   2024-11-15 13:13:29.021751829 +0100
> > +++ gcc/c-family/c.opt  2024-11-15 15:35:02.401561741 +0100
> > @@ -1686,6 +1686,14 @@ fasm
> >  C ObjC C++ ObjC++ Var(flag_no_asm, 0)
> >  Recognize the \"asm\" keyword.
> >
> > +fassume-sane-operators-new-delete
> > +C++ ObjC++ LTO Alias(fassume-sane-operators-new-delete=,2,0)
> > +Assume C++ replaceable global operators new, new[], delete, delete[] don't 
> > read or write visible global state.
> > +
> > +fassume-sane-operators-

[PATCH 11/15] Alpha: Fix a block move pessimisation with zero-extension after LDWU

2024-11-19 Thread Maciej W. Rozycki
For the BWX case we have a pessimisation in `alpha_expand_block_move' 
for HImode loads where we place the data loaded into a HImode register 
as well, therefore losing information that indeed the data loaded has 
already been zero-extended to the full DImode width of the register.  
Later on when we store this data in QImode quantities into an unaligned 
destination, we zero-extend it again for the purpose of right-shifting,
such as with the test case included producing code at `-O2' as follows:

ldah $2,unaligned_src_hi($29)   !gprelhigh
lda $1,unaligned_src_hi($2) !gprellow
ldwu $6,unaligned_src_hi($2)!gprellow
ldwu $5,2($1)
ldwu $4,4($1)
bis $31,$31,$31
zapnot $6,3,$3  # Redundant!
ldbu $7,6($1)
zapnot $5,3,$2  # Redundant!
stb $6,0($16)
zapnot $4,3,$1  # Redundant!
stb $5,2($16)
srl $3,8,$3
stb $4,4($16)
srl $2,8,$2
stb $3,1($16)
srl $1,8,$1
stb $2,3($16)
stb $1,5($16)
stb $7,6($16)

The non-BWX case is unaffected, because there we use byte insertion, so 
we don't care that data is held in a HImode register.

Address this by making the holding RTX a HImode subreg of the original 
DImode register, which the RTL passes can then see through and eliminate 
the zero-extension where otherwise required, resulting in this shortened 
code:

ldah $2,unaligned_src_hi($29)   !gprelhigh
lda $1,unaligned_src_hi($2) !gprellow
ldwu $4,unaligned_src_hi($2)!gprellow
ldwu $3,2($1)
ldwu $2,4($1)
bis $31,$31,$31
srl $4,8,$6
ldbu $1,6($1)
srl $3,8,$5
stb $4,0($16)
stb $6,1($16)
srl $2,8,$4
stb $3,2($16)
stb $5,3($16)
stb $2,4($16)
stb $4,5($16)
stb $1,6($16)

While at it reformat the enclosing do-while statement according to the 
GNU Coding Standards, observing that in this case it does not obfuscate 
the change, owing to the odd original indentation.

gcc/
* config/alpha/alpha.cc (alpha_expand_block_move): Use a HImode 
subreg of a DImode register to hold data from an aligned HImode 
load.
---
 gcc/config/alpha/alpha.cc|   17 +--
 gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c |   16 ++
 2 files changed, 27 insertions(+), 6 deletions(-)

gcc-alpha-unaligned-store-bwx-hi.diff
Index: gcc/gcc/config/alpha/alpha.cc
===
--- gcc.orig/gcc/config/alpha/alpha.cc
+++ gcc/gcc/config/alpha/alpha.cc
@@ -3998,14 +3998,19 @@ alpha_expand_block_move (rtx operands[])
   if (bytes >= 2)
 {
   if (src_align >= 16)
-   {
- do {
-   data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
-   emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+   do
+ {
+   tmp = gen_reg_rtx (DImode);
+   emit_move_insn (tmp,
+   expand_simple_unop (DImode, SET,
+   adjust_address (orig_src,
+   HImode, ofs),
+   NULL_RTX, 1));
+   data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0);
bytes -= 2;
ofs += 2;
- } while (bytes >= 2);
-   }
+ }
+   while (bytes >= 2);
   else if (! TARGET_BWX)
{
  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
Index: gcc/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c
===
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned short unaligned_src_hi[4];
+
+void
+memcpy_unaligned_dst_hi (void *dst)
+{
+  __builtin_memcpy (dst, unaligned_src_hi, 7);
+}
+
+/* { dg-final { scan-assembler-times "\\sldwu\\s" 3 } } */
+/* { dg-final { scan-assembler-times "\\sldbu\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 7 } } */
+/* { dg-final { scan-assembler-not "\\szapnot\\s" } } */


[PATCH 05/15] Alpha: Permit constant zero source for "insvmisaligndi"

2024-11-19 Thread Maciej W. Rozycki
Eliminate a redundant bitwise inclusive OR operation on the insertion of 
constant zero into a bit-field, improving code produced at `-O2' from an 
output sequence such as:

mov $31,$3  # Redundant!
ldq_u $1,7($16)
insqh $3,$16,$3 # Redundant!
ldq_u $2,0($16)
mskqh $1,$16,$1
mskql $2,$16,$2
bis $1,$3,$1# Redundant!
stq_u $1,7($16)
stq_u $2,0($16)
ret $31,($26),1

to:

ldq_u $2,7($16)
ldq_u $1,0($16)
mskqh $2,$16,$2
stq_u $2,7($16)
mskql $1,$16,$1
stq_u $1,0($16)
ret $31,($26),1

for a quadword unaligned store operation.  As shown in the example this 
only triggers for the high-part store (and therefore only for 2-byte, 
4-byte, and 8-byte stores), because `insXl' insns are fully expressed in 
terms of RTL and therefore the insertion of zero is eliminated in later 
RTL passes, however corresponding `insXh' insns are unspecs only, making 
them impossible to see through.

We can get this optimal right from expand though, given that our handler 
for "insvmisaligndi", i.e. `alpha_expand_unaligned_store', has explicit 
provisions for `const0_rtx' source.

gcc/
* config/alpha/alpha.md (insvmisaligndi): Use "reg_or_0_operand" 
rather than "register_operand" for operand 3.

gcc/testsuite/
* gcc.target/alpha/stlx0.c: New file.
* gcc.target/alpha/stqx0.c: New file.
* gcc.target/alpha/stwx0.c: New file.
* gcc.target/alpha/stwx0-bwx.c: New file.
---
 gcc/config/alpha/alpha.md  |2 +-
 gcc/testsuite/gcc.target/alpha/stlx0.c |   28 
 gcc/testsuite/gcc.target/alpha/stqx0.c |   28 
 gcc/testsuite/gcc.target/alpha/stwx0-bwx.c |   19 +++
 gcc/testsuite/gcc.target/alpha/stwx0.c |   28 
 5 files changed, 104 insertions(+), 1 deletion(-)

gcc-alpha-insvmisaligndi-zero.diff
Index: gcc/gcc/config/alpha/alpha.md
===
--- gcc.orig/gcc/config/alpha/alpha.md
+++ gcc/gcc/config/alpha/alpha.md
@@ -4626,7 +4626,7 @@
   [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand")
 (match_operand:DI 1 "const_int_operand")
 (match_operand:DI 2 "const_int_operand"))
-   (match_operand:DI 3 "register_operand"))]
+   (match_operand:DI 3 "reg_or_0_operand"))]
   ""
 {
   /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
Index: gcc/gcc/testsuite/gcc.target/alpha/stlx0.c
===
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/stlx0.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef struct { int v __attribute__ ((packed)); } intx;
+
+void
+stlx0 (intx *p)
+{
+  p->v = 0;
+}
+
+/* Expect assembly such as:
+
+   ldq_u $2,3($16)
+   ldq_u $1,0($16)
+   msklh $2,$16,$2
+   stq_u $2,3($16)
+   mskll $1,$16,$1
+   stq_u $1,0($16)
+
+   without any INSLH, INSLL, or BIS instructions.  */
+
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\smsklh\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\smskll\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:bis|inslh|insll)\\s" } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/stqx0.c
===
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/stqx0.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef struct { long v __attribute__ ((packed)); } longx;
+
+void
+stqx0 (longx *p)
+{
+  p->v = 0;
+}
+
+/* Expect assembly such as:
+
+   ldq_u $2,7($16)
+   ldq_u $1,0($16)
+   mskqh $2,$16,$2
+   stq_u $2,7($16)
+   mskql $1,$16,$1
+   stq_u $1,0($16)
+
+   without any INSQH, INSQL, or BIS instructions.  */
+
+/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\smskqh\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\smskql\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:bis|insqh|insql)\\s" } } */
Index: gcc/gcc/testsuite/gcc.target/alpha/stwx0-bwx.c
===
--- /dev/null
+++ gcc/gcc/testsuite/gcc.target/alpha/stwx0-bwx.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef struct { short v __attribute__ ((packed)); } shortx;
+
+void
+stwx0 (shortx *p)
+{
+  p->v = 0;
+}
+
+/* Expect as

Re: [PATCH v2 2/4] vect: disable multiple calls of poly simdclones

2024-11-19 Thread Richard Biener
On Mon, 18 Nov 2024, Victor Do Nascimento wrote:

> On 11/5/24 07:39, Richard Biener wrote:
> > On Tue, 5 Nov 2024, Victor Do Nascimento wrote:
> > 
> >> The current codegen code to support VF's that are multiples of a simdclone
> >> simdlen rely on BIT_FIELD_REF to create multiple input vectors.  This does
> >> not
> >> work for non-constant simdclones, so we should disable using such clones
> >> when
> >> the VF is a multiple of the non-constant simdlen until we change the
> >> codegen to
> >> support those.
> > 
> > ISTR BIT_FIELD_REF now uses poly-int offset and size so what breaks
> > here?  I don't see any other way that such BIT_FIELD_REFs to represent
> > hi/lo part accesses?
> > 
> > Richard.
> 
> Upon further investigation, while you are right that BIT_FIELD_REF does
> use poly-int types for both offsets and sizes, much of the expand code
> does not know how to deal with variable-len vectors.
> 
> One such example of this is in `store_constructor_field' where, after
> having called our simdclone fn twice passing it the high and low parts
> of an SVE vec as an argument, we try to concatenate the returned
> subvectors back together.
> 
> In `store_constructor', for example, many updates from integer operators
> to their poly_int64 counterparts are needed (though this is trivial) and
> in other parts new logic is needed altogether.
> 
> One example of this can be seen in `store_bit_field_1', where you see
> the following statments:
> 
>   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
>   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
> 
> which will ICE for variable-length bitsizes and bitnums and which guard
> the subsequent section of code from trying to handle non-const lens.

But since BIT_FIELD_REFs with poly-int offset/size are now a thing
we have to fix all those places.  Or revert back to not allowing
them.

We at least need to guard the problematic code appropriately.

> Therefore, while I believe that the full support for poly-int
> BIT_FIELD_REFs is appropriate and something that ought to be done, the
> 
>   if (!n->simdclone->simdlen.is_constant () && num_calls != 1)
> 
> guard in this patch is, at least for now, strictly necessary and can be
> readily removed once work on BIT_FIELD_REF is completed.

GCCs history tells us this will never happen ;)  Given this is for
a new feature I insist you try a bit harder and fixup the places that
ICE (if only by guarding them with .is_constant ()).  Those parts will
be helpful even if in the end you don't succeed with bug-squashing.

Richard.

> Regards,
> Victor
> 
> >> gcc/ChangeLog:
> >>
> >>  * tree-vect-stmts.cc (vectorizable_simd_clone_call): Reject simdclones
> >>  with non-constant simdlen when VF is not exactly the same.
> >> ---
> >>   gcc/tree-vect-stmts.cc | 5 -
> >>   1 file changed, 4 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> >> index 2d0da6f0a0e..961421fee25 100644
> >> --- a/gcc/tree-vect-stmts.cc
> >> +++ b/gcc/tree-vect-stmts.cc
> >> @@ -4149,7 +4149,10 @@ vectorizable_simd_clone_call (vec_info *vinfo,
> >> stmt_vec_info stmt_info,
> >>if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
> >> &num_calls)
> >>|| (!n->simdclone->inbranch && (masked_call_offset > 0))
> >> -  || (nargs != simd_nargs))
> >> +  || (nargs != simd_nargs)
> >> +  /* Currently we do not support multiple calls of non-constant
> >> + simdlen as poly vectors can not be accessed by BIT_FIELD_REF.
> >> */
> >> +  || (!n->simdclone->simdlen.is_constant () && num_calls != 1))
> >>  continue;
> >>if (num_calls != 1)
> >>  this_badness += floor_log2 (num_calls) * 4096;
> >>
> > 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH 14/17] testsuite: arm: Use -march=unset for pr69175.C test

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.dg/opt/pr69175.C: Added option "-mcpu=unset".

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.dg/opt/pr69175.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/opt/pr69175.C 
b/gcc/testsuite/g++.dg/opt/pr69175.C
index e24f6816b5f..6d28951d5ae 100644
--- a/gcc/testsuite/g++.dg/opt/pr69175.C
+++ b/gcc/testsuite/g++.dg/opt/pr69175.C
@@ -1,7 +1,7 @@
 // PR target/69175
 // { dg-do compile }
 // { dg-options "-O2" }
-// { dg-additional-options "-march=armv7-a -mfloat-abi=hard -mfpu=vfpv3-d16 
-mthumb" { target { arm_hard_vfp_ok && arm_thumb2_ok } } }
+// { dg-additional-options "-mcpu=unset -march=armv7-a -mfloat-abi=hard 
-mfpu=vfpv3-d16 -mthumb" { target { arm_hard_vfp_ok && arm_thumb2_ok } } }
 
 struct A { A *c, *d; } a;
 struct B { A *e; A *f; void foo (); };
-- 
2.25.1



[PATCH 02/17] testsuite: arm: Use effective-target for pacbti-m-predef* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-1.c: Use effective-target
arm_arch_v8_1m_main.
* gcc.target/arm/acle/pacbti-m-predef-2.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-3.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-4.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-5.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-6.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-8.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-9.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-10.c: Likewise.
* gcc.target/arm/acle/pacbti-m-predef-11.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-4.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-5.c  | 5 +++--
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-6.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-8.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-9.c  | 4 +++-
 9 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
index 122f7a762a7..0abc3372149 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-1.c
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
 /* { dg-require-effective-target mbranch_protection_ok } */
-/* { dg-options "-march=armv8.1-m.main+fp -mbranch-protection=pac-ret+bti 
-mfloat-abi=hard --save-temps" } */
+/* { dg-options "-mbranch-protection=pac-ret+bti -mfloat-abi=hard 
--save-temps" } */
+/* { dg-add-options arm_arch_v8_1m_main } */
 
 #if !defined (__ARM_FEATURE_BTI_DEFAULT)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be defined."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c
index 52d18238109..08f90a9d9c1 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-10.c
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
 /* { dg-require-effective-target mbranch_protection_ok } */
-/* { dg-additional-options "-march=armv8.1-m.main+fp 
-mbranch-protection=bti+pac-ret -mfloat-abi=hard" } */
+/* { dg-additional-options "-mbranch-protection=bti+pac-ret -mfloat-abi=hard" 
} */
+/* { dg-add-options arm_arch_v8_1m_main } */
 
 #if (__ARM_FEATURE_BTI_DEFAULT != 1)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be defined to 1."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c
index cd418ce0c7f..de34cc87f99 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c
@@ -1,7 +1,9 @@
 /* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_link } */
 /* { dg-require-effective-target mbranch_protection_ok } */
 /* { dg-require-effective-target arm_pacbti_hw } */
-/* { dg-options "-march=armv8.1-m.main+pacbti+fp 
-mbranch-protection=bti+pac-ret+leaf -mthumb -mfloat-abi=hard" } */
+/* { dg-options "-mbranch-protection=bti+pac-ret+leaf -mfloat-abi=hard" } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
 
 #if !defined (__ARM_FEATURE_BTI_DEFAULT)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be defined."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c
index b94f3447ad9..e19e41626dd 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c
+++ b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-3.c
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_ok } */
 /* { dg-require-effective-target mbranch_protection_ok } */
-/* { dg-options "-march=armv8.1-m.main+fp -mbranch-protection=pac-ret+leaf 
-mfloat-abi=hard --save-temps" } */
+/* { dg-options "-mbranch-protection=pac-ret+leaf -mfloat-abi=hard 
--save-temps" } */
+/* { dg-add-options arm_arch_v8_1m_main } */
 
 #if defined (__ARM_FEATURE_BTI_DEFAULT)
 #error "Feature test macro __ARM_FEATURE_BTI_DEFAULT should be undefined."
diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-4.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-4.c
index ce4b45ab464..9040b0a6464 100644
--- a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-4.c
+++ b/gcc/testsuite/gc

[PATCH v1 1/2] RISC-V: Fix incorrect optimization options passing to strided ld/st test

2024-11-19 Thread pan2 . li
From: Pan Li 

The testcases of vector strided load/store are designed to pick up
different sorts of optimization options but actually these option
are ignored according to the Execution log of gcc.log.  This patch
would like to make it correct, and then you will see the build option
similar as below from the gcc.log.

Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m1 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m1 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m4 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m8 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl 
-mrvv-max-lmul=dynamic ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m8 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m2 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=dynamic ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2 
...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Fix the incorrect optimization options.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index 12002dd51bf..dbe1f11c0e8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -121,7 +121,7 @@ foreach op $AUTOVEC_TEST_OPTS {
   dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/gather-scatter/*.\[cS\]]] \
 "" "$op"
   dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/strided/*.\[cS\]]] \
-"" "$op"
+"$op" ""
 }
 
 # All done.
-- 
2.43.0



Stage1 patch ping

2024-11-19 Thread Jakub Jelinek
Hi!

I'd like to ping various stage1 patches.

Padding zeroing patchset


https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665565.html
  expr, c, gimplify: Don't clear whole unions [PR116416]
  This one needs C FE review (especially if the testcase matches
  the actual C23 behavior) + middle-end

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668065.html
  c++: Small initial fixes for zeroing of padding bits [PR117256]
  This one has been approved already but including here for completeness,
  as it depends on the previous patch.

C2Y N3349, Abs Without Undefined Behavior patch
===

https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665651.html
  c: Add u{,l,ll,imax}abs builtins [PR117024]
  This patch is actually purely middle-end, so maybe doesn't need
  C FE review (except perhaps verify that the patch is actually useful
  for C2Y stdlib.h and inttypes.h

Toplevel Extended Asm support
=

I think all of these need primarily C/C++ FE review, Richi already
commented on them from middle-end POV

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667276.html
  Allow limited extended asm at toplevel [PR41045]

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667735.html
  inline asm: Add new constraint for symbol definitions

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667737.html
  inline-asm: Add support for cc operand modifier

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/669272.html
  inline-asm: Add - constraint modifier support for toplevel extended asm 
[PR41045]

Inline Asm redzone clobber
==

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667949.html
  inline-asm, i386: Add "redzone" clobber support
  This one needs primarily C-family FE review, Uros already acked the
  i386 part

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/667970.html
  rs6000: Add PowerPC inline asm redzone clobber support
  And this one needs PowerPC review but is dependent on the previous
  patch

C++ dynamic initialization into static initialization optimization
==

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668303.html
  c++, dyninit: Optimize C++ dynamic initialization by constants into 
DECL_INITIAL adjustment [PR102876]
  This one needs primarily middle-end review, but some C++ FE too

C2Y N3322, Allow zero length operations on null pointers patchset
=

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668554.html
  Add support for nonnull_if_nonzero attribute [PR117023]
  This one awaits C-family and middle-end review

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668699.html
  analyzer: Handle nonnull_if_nonzero attribute [PR117023]
  This one awaits analyzer review (and perhaps further work)

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668700.html
  ranger: Handle nonnull_if_nonzero attribute [PR117023]
  This is the minimal patch to get things working, I think Andrew is
  fine with this version and has incremental patches to improve

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668711.html
  Use nonnull_if_nonzero attribute rather than nonnull on various builtins 
[PR117023]
  This one is purely middle-end patch, but perhaps some C FE feedback
  whether that is what C2Y wants is helpful

Patch for ignoring of inline/constexpr keywords or defining methods
in classes for optimization purposes
===

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668798.html
  Introduce feeble_inline attribute [PR93008]
  With the suggested s/DECL_OPTIMIZABLE_INLINE_P/DECL_AGGRESSIBLE_INLINE_P/
  This needs C/C++ FE review and middle-end review

Improve optimizations of ::operator {new,delete}{,[]} patch
===

https://gcc.gnu.org/pipermail/gcc-patches/2024-November/668986.html
  Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]
  This needs middle-end review and perhaps C++ guidance

Thanks

Jakub



[RFC PATCH] cselib: Reuse VALUEs on reg adjustments

2024-11-19 Thread Bohan Lei
The commit 2c0fa3ecf70d199af18785702e9e0548fd3ab793 reuses VALUEs on sp
adjustments.  We can generalize the idea and reuse VALUEs on other
registers.  This can help the postreload pass find more opportunities to
simplify insns.

The following assembly code is generated from the testcase using the current
trunk compiler:

.L5:
movq%rbp, %rsi
movq%rbx, %rdi
calll
movq%rbx, %rsi
addq$4, %rbx
testl   %eax, %eax
je  .L6
leaq-4(%rbx), %rax
cmpq%rax, %rbp
je  .L6
movq%rbx, %rdi
callas

The lea instruction is actually redundant here, as rsi contains the same
value as rbx-4 and can be used in the cmp instruction instead of rax.
With this patch, the lea instruction can be eliminated in the postreload
pass.

Bootstrapped and regtested on x86_64-pc-linux-gnu, no regressions.
---
 gcc/cselib.cc| 29 ++--
 gcc/rtl.h|  1 +
 gcc/testsuite/gcc.target/i386/cselib-1.c | 22 ++
 3 files changed, 40 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/cselib-1.c

diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index e6a36e892bb..43ab266c4de 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -70,6 +70,9 @@ static rtx autoinc_split (rtx, rtx *, machine_mode);
 #define SP_DERIVED_VALUE_P(RTX) \
   (RTL_FLAG_CHECK1 ("SP_DERIVED_VALUE_P", (RTX), VALUE)->call)
 
+#define REG_DERIVED_VALUE_P(RTX) \
+  (RTL_FLAG_CHECK1 ("REG_DERIVED_VALUE_P", (RTX), VALUE)->volatil)
+
 struct expand_value_data
 {
   bitmap regs_active;
@@ -137,7 +140,7 @@ cselib_hasher::equal (const cselib_val *v, const key *x_arg)
   if (GET_CODE (x) == VALUE)
 return x == v->val_rtx;
 
-  if (SP_DERIVED_VALUE_P (v->val_rtx) && GET_MODE (x) == Pmode)
+  if (REG_DERIVED_VALUE_P (v->val_rtx) && GET_MODE (x) == Pmode)
 {
   rtx xoff = NULL;
   if (autoinc_split (x, &xoff, memmode) == v->val_rtx && xoff == NULL_RTX)
@@ -905,7 +908,7 @@ autoinc_split (rtx x, rtx *off, machine_mode memmode)
e = cselib_lookup (x, GET_MODE (x), 0, memmode);
   if (e)
{
- if (SP_DERIVED_VALUE_P (e->val_rtx)
+ if (REG_DERIVED_VALUE_P (e->val_rtx)
  && (*off == NULL_RTX || *off == const0_rtx))
{
  *off = NULL_RTX;
@@ -914,7 +917,7 @@ autoinc_split (rtx x, rtx *off, machine_mode memmode)
  for (struct elt_loc_list *l = e->locs; l; l = l->next)
if (GET_CODE (l->loc) == PLUS
&& GET_CODE (XEXP (l->loc, 0)) == VALUE
-   && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
+   && REG_DERIVED_VALUE_P (XEXP (l->loc, 0))
&& CONST_INT_P (XEXP (l->loc, 1)))
  {
if (*off == NULL_RTX)
@@ -971,8 +974,8 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode memmode, 
int depth)
   if (GET_CODE (y) == VALUE)
return e == canonical_cselib_val (CSELIB_VAL_PTR (y));
 
-  if ((SP_DERIVED_VALUE_P (x)
-  || SP_DERIVED_VALUE_P (e->val_rtx))
+  if ((REG_DERIVED_VALUE_P (x)
+  || REG_DERIVED_VALUE_P (e->val_rtx))
  && GET_MODE (y) == Pmode)
{
  rtx yoff = NULL;
@@ -1004,8 +1007,8 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode 
memmode, int depth)
   cselib_val *e = canonical_cselib_val (CSELIB_VAL_PTR (y));
   struct elt_loc_list *l;
 
-  if ((SP_DERIVED_VALUE_P (y)
-  || SP_DERIVED_VALUE_P (e->val_rtx))
+  if ((REG_DERIVED_VALUE_P (y)
+  || REG_DERIVED_VALUE_P (e->val_rtx))
  && GET_MODE (x) == Pmode)
{
  rtx xoff = NULL;
@@ -1257,11 +1260,11 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int 
create,
   if (! e)
 return 0;
 
-  if (! SP_DERIVED_VALUE_P (e->val_rtx))
+  if (! REG_DERIVED_VALUE_P (e->val_rtx))
 for (struct elt_loc_list *l = e->locs; l; l = l->next)
   if (GET_CODE (l->loc) == PLUS
  && GET_CODE (XEXP (l->loc, 0)) == VALUE
- && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
+ && REG_DERIVED_VALUE_P (XEXP (l->loc, 0))
  && CONST_INT_P (XEXP (l->loc, 1)))
{
  e = CSELIB_VAL_PTR (XEXP (l->loc, 0));
@@ -2233,13 +2236,13 @@ cselib_subst_to_values (rtx x, machine_mode memmode)
  rtx t = cselib_subst_to_values (XEXP (x, 0), memmode);
  if (GET_CODE (t) == VALUE)
{
- if (SP_DERIVED_VALUE_P (t) && XEXP (x, 1) == const0_rtx)
+ if (REG_DERIVED_VALUE_P (t) && XEXP (x, 1) == const0_rtx)
return t;
  for (struct elt_loc_list *l = CSELIB_VAL_PTR (t)->locs;
   l; l = l->next)
if (GET_CODE (l->loc) == PLUS
&& GET_CODE (XEXP (l->loc, 0)) == VALUE
-   && SP_DERIVED_VALUE_P (XEXP (l->loc, 0))
+   && REG_DERIVED_VALUE_P (XEXP (l->loc, 0))
&

[PATCH 01/17] testsuite: arm: Use effective-target for bti* and pac* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pac-1.c: Use effective-target
arm_arch_v8_1m_main_pacbti.
* gcc.target/arm/pac-1.c: Likewise.
* gcc.target/arm/pac-2.c: Likewise.
* gcc.target/arm/pac-3.c: Likewise.
* gcc.target/arm/pac-4.c: Likewise.
* gcc.target/arm/pac-5.c: Likewise.
* gcc.target/arm/pac-7.c: Likewise.
* gcc.target/arm/pac-8.c: Likewise.
* gcc.target/arm/pac-9.c: Likewise.
* gcc.target/arm/pac-10.c: Likewise.
* gcc.target/arm/pac-11.c: Likewise.
* gcc.target/arm/pac-sibcall.c: Likewise.
* gcc.target/arm/pac-sibcall-2.c: Likewise.
* gcc.target/arm/pac-sibcall-3.c: Likewise.
* gcc.target/arm/pac-12.c: Added option "-mcpu=unset".
* gcc.target/arm/pac-13.c: Likewise.
* gcc.target/arm/pac-14.c: Likewise.
* lib/target-supports.exp(check_effective_target_arm_pacbti_hw):
Likewise.
* gcc.target/arm/pac-6.c: Use effective-target
arm_arch_v8_1m_main.
* gcc.target/arm/pac-15.c: Use effective-target
arm_arch_v8_1m_main_pacbti and added option "-mcpu=unset".

Signed-off-by: Torbjörn SVENSSON 
Co-authored-by: Yvan ROUX 
---
 gcc/testsuite/gcc.target/arm/pac-1.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-10.c | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-11.c | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-12.c | 2 +-
 gcc/testsuite/gcc.target/arm/pac-13.c | 2 +-
 gcc/testsuite/gcc.target/arm/pac-14.c | 2 +-
 gcc/testsuite/gcc.target/arm/pac-15.c | 4 ++--
 gcc/testsuite/gcc.target/arm/pac-2.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-3.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-4.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-5.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-6.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-7.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-8.c  | 4 +++-
 gcc/testsuite/gcc.target/arm/pac-9.c  | 4 +++-
 gcc/testsuite/lib/target-supports.exp | 2 +-
 16 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pac-1.c 
b/gcc/testsuite/gcc.target/arm/pac-1.c
index e0eea0858e0..a2bec355214 100644
--- a/gcc/testsuite/gcc.target/arm/pac-1.c
+++ b/gcc/testsuite/gcc.target/arm/pac-1.c
@@ -1,8 +1,10 @@
 /* Testing return address signing.  */
 /* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_link } */
 /* { dg-require-effective-target mbranch_protection_ok } */
 /* { dg-require-effective-target arm_pacbti_hw } */
-/* { dg-options "-march=armv8.1-m.main+pacbti+fp 
-mbranch-protection=pac-ret+leaf -mthumb -mfloat-abi=hard --save-temps -O0" } */
+/* { dg-options "-mbranch-protection=pac-ret+leaf -mfloat-abi=hard 
--save-temps -O0" } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
 
 #include "pac.h"
 
diff --git a/gcc/testsuite/gcc.target/arm/pac-10.c 
b/gcc/testsuite/gcc.target/arm/pac-10.c
index 6da8434aeaf..0882dad7406 100644
--- a/gcc/testsuite/gcc.target/arm/pac-10.c
+++ b/gcc/testsuite/gcc.target/arm/pac-10.c
@@ -1,7 +1,9 @@
 /* Testing return address signing.  */
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
 /* { dg-require-effective-target mbranch_protection_ok } */
-/* { dg-options "-march=armv8.1-m.main+pacbti+fp -mbranch-protection=pac-ret 
-mthumb -mfloat-abi=hard --save-temps -O0" } */
+/* { dg-options "-mbranch-protection=pac-ret -mfloat-abi=hard --save-temps 
-O0" } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
 
 #include "pac.h"
 
diff --git a/gcc/testsuite/gcc.target/arm/pac-11.c 
b/gcc/testsuite/gcc.target/arm/pac-11.c
index 0bb727c2c80..32685a726b7 100644
--- a/gcc/testsuite/gcc.target/arm/pac-11.c
+++ b/gcc/testsuite/gcc.target/arm/pac-11.c
@@ -1,7 +1,9 @@
 /* Testing return address signing.  */
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
 /* { dg-require-effective-target mbranch_protection_ok } */
-/* { dg-options "-march=armv8.1-m.main+pacbti+fp 
-mbranch-protection=bti+pac-ret+leaf -mthumb -mfloat-abi=hard --save-temps -O2" 
} */
+/* { dg-options "-mbranch-protection=bti+pac-ret+leaf -mfloat-abi=hard 
--save-temps -O2" } */
+/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
 
 #include "pac.h"
 
diff --git a/gcc/testsuite/gcc.target/arm/pac-12.c 
b/gcc/testsuite/gcc.target/arm/pac-12.c
index 6e1295c834d..37bf0047c2e 100644
--- a/gcc/testsuite/gcc.target/arm/pac-12.c
+++ b/gcc/testsuite/gcc.target/arm/pac-12.c
@@ -2,6 +2,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target arm_pacbti_hw } */
 /* { dg-skip-if "need fp instructions" { *-*-* } { "" } { "-mfloat-abi=hard" } 
} */
-/* { dg-options "-march=armv8.1-m.main+dsp+fp.dp+pacbti 
-mbranch-protection=standard -mthumb -mfloat-abi=hard" } */
+/* { dg-options "-mcpu=unset -march=armv8.1-m.main+dsp+fp.dp+pacbti 
-mbranch-protection=standard -mthumb -mfloat-a

[PATCH 05/17] testsuite: arm: Use effective-target for small-multiply-m* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/small-multiply-m0-1.c: Use effective-target
arm_arch_v6m and added option "-march=unset".
* gcc.target/arm/small-multiply-m0-2.c: Likewise.
* gcc.target/arm/small-multiply-m0-3.c: Likewise.
* gcc.target/arm/small-multiply-m0plus-1.c: Likewise.
* gcc.target/arm/small-multiply-m0plus-2.c: Likewise.
* gcc.target/arm/small-multiply-m0plus-3.c: Likewise.
* gcc.target/arm/small-multiply-m1-1.c: Likewise.
* gcc.target/arm/small-multiply-m1-2.c: Likewise.
* gcc.target/arm/small-multiply-m1-3.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c | 4 ++--
 gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c | 4 ++--
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c 
b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
index 52c652c1cba..c62e2f97ade 100644
--- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
+++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v6m_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
"-mcpu=cortex-m0.small-multiply" } } */
-/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -O2" } */
+/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -O2" } */
 
 int
 test (int a)
diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c 
b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
index 10d49e9eace..a9e076b0e60 100644
--- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
+++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v6m_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
"-mcpu=cortex-m0.small-multiply" } } */
-/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
+/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -Os" } */
 
 int
 test (int a)
diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c 
b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
index b4af511af86..973c78aee37 100644
--- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
+++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v6m_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
"-mcpu=cortex-m0.small-multiply" } } */
-/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
+/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -Os" } */
 
 int
 test (int a)
diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c 
b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
index 59dba7cf4ab..53e68ae1364 100644
--- a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
+++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v6m_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
"-mcpu=cortex-m0plus.small-multiply" } } */
-/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -O2" } */
+/* { dg-options "-march=unset -mcpu=cortex-m0plus.small-multiply -mthumb -O2" 
} */
 
 int
 test (int a)
diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c 
b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
index 685ef440776..19f941dd1b7 100644
--- a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
+++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v6m_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
"-mcpu=cortex-m0plus.small-multiply" } } */
-/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -Os" } */
+/* { dg-options "-march=unset -mcpu=cortex-m0plus.small-multiply -mthumb -Os" 
} */
 
 int
 test (int a)
diff --git a/

[PATCH 08/17] testsuite: arm: Use effective-target for vect-early-break-cbranch test

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/vect-early-break-cbranch.c: Use
effective-target arm_arch_v8a_hard.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/vect-early-break-cbranch.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/vect-early-break-cbranch.c 
b/gcc/testsuite/gcc.target/arm/vect-early-break-cbranch.c
index 334e064a239..fb12bfb3197 100644
--- a/gcc/testsuite/gcc.target/arm/vect-early-break-cbranch.c
+++ b/gcc/testsuite/gcc.target/arm/vect-early-break-cbranch.c
@@ -2,7 +2,9 @@
 /* { dg-require-effective-target vect_early_break } */
 /* { dg-require-effective-target arm_neon_ok } */
 /* { dg-require-effective-target arm32 } */
-/* { dg-options "-O3 -march=armv8-a+simd -mfpu=auto -mfloat-abi=hard  
-fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2" } */
+/* { dg-require-effective-target arm_arch_v8a_hard_ok } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks 
-fno-schedule-insns2" } */
+/* { dg-add-options arm_arch_v8a_hard } */
 /* { dg-final { check-function-bodies "**" "" "" } } */
 
 #define N 640
-- 
2.25.1



[PATCH 06/17] testsuite: arm: Use effective-target for thumb2-slow-flash-data* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb2-slow-flash-data-2.c: Use
effective-target arm_arch_v7em and added option "-march=unset
-mfpu=auto".
* gcc.target/arm/thumb2-slow-flash-data-3.c: Likewise.
* gcc.target/arm/thumb2-slow-flash-data-4.c: Likewise.
* gcc.target/arm/thumb2-slow-flash-data-5.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c | 7 +++
 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c | 7 +++
 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c | 7 +++
 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c | 7 +++
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c 
b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
index 231243759cf..581e510762a 100644
--- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
@@ -1,11 +1,10 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_cortex_m } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-require-effective-target arm_fp_ok } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mcpu=*" 
} { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { 
"-mfloat-abi=hard" } } */
+/* { dg-require-effective-target arm_arch_v7em_ok } */
 /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
*-*-* } { "-mword-relocations" } } */
-/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -O2 -mthumb 
-mslow-flash-data" } */
+/* { dg-options "-mfloat-abi=hard -mfpu=auto -O2 -mslow-flash-data" } */
+/* { dg-add-options arm_arch_v7em } */
 
 float f (float);
 
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c 
b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
index 27e72ec2086..b5f4c0d7d6e 100644
--- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
@@ -1,11 +1,10 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_cortex_m } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-require-effective-target arm_fp_ok } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mcpu=*" 
} { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { 
"-mfloat-abi=hard" } } */
+/* { dg-require-effective-target arm_arch_v7em_ok } */
 /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
*-*-* } { "-mword-relocations" } } */
-/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -mthumb 
-mslow-flash-data" } */
+/* { dg-options "-mfloat-abi=hard -mfpu=auto -mslow-flash-data" } */
+/* { dg-add-options arm_arch_v7em } */
 
 /* From PR71607 */
 
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c 
b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
index 8dbe87a1e68..a4d3846996e 100644
--- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
@@ -1,11 +1,10 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_cortex_m } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-require-effective-target arm_fp_ok } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mcpu=*" 
} { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { 
"-mfloat-abi=hard" } } */
+/* { dg-require-effective-target arm_arch_v7em_ok } */
 /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
*-*-* } { "-mword-relocations" } } */
-/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -O2 -mthumb 
-mslow-flash-data" } */
+/* { dg-options "-mfloat-abi=hard -mfpu=auto -O2 -mslow-flash-data" } */
+/* { dg-add-options arm_arch_v7em } */
 
 double __attribute__ ((target ("fpu=fpv5-d16")))
 foo (void)
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c 
b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c
index b98eb7624e4..0fcfb65c5cd 100644
--- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c
+++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c
@@ -1,11 +1,10 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_cortex_m } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-require-effective-target arm_fp_ok } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mcpu=*" 
} { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } { 
"-mfloat-abi=hard" } } */
+/* { dg-require-effective-target arm_arch_v7em_ok } */
 /* { dg-

[PATCH 13/17] testsuite: arm: Use -march=unset for cortex-m55* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cortex-m55-nodsp-flag-hard.c: Added option
"-march=unset".
* gcc.target/arm/cortex-m55-nodsp-flag-softfp.c: Likewise.
* gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c: Likesie.
* gcc.target/arm/cortex-m55-nofp-flag-hard.c: Likewise.
* gcc.target/arm/cortex-m55-nofp-flag-softfp.c: Likewise.
* gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c: Likewise.
* gcc.target/arm/cortex-m55-nomve-flag-hard.c: Likewise.
* gcc.target/arm/cortex-m55-nomve-flag-softfp.c: Likewise.
* gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c: Likewise.
* gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c   | 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c | 2 +-
 .../gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c  | 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c| 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c  | 2 +-
 .../gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c  | 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c   | 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c | 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c| 2 +-
 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c 
b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
index 92c15112ae7..9810e28f58d 100644
--- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
+++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
-/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mthumb -mfloat-abi=hard 
-mfpu=auto --save-temps" } */
+/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp -mthumb 
-mfloat-abi=hard -mfpu=auto --save-temps" } */
 /* { dg-final { scan-assembler "\.arch_extension fp" } } */
 /* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */
 /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c 
b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
index 89d778f8ecb..cc92d8ccc60 100644
--- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
+++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
-/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mthumb -mfloat-abi=softfp 
-mfpu=auto --save-temps" } */
+/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp -mthumb 
-mfloat-abi=softfp -mfpu=auto --save-temps" } */
 /* { dg-final { scan-assembler "\.arch_extension fp" } } */
 /* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */
 /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c 
b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
index 405090ca9e9..11d4634a30f 100644
--- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
+++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
-/* { dg-additional-options "-mcpu=cortex-m55+nodsp+nofp -mthumb 
-mfloat-abi=softfp -mfpu=auto --save-temps" } */
+/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp+nofp -mthumb 
-mfloat-abi=softfp -mfpu=auto --save-temps" } */
 /* { dg-final { scan-assembler-not "\.arch_extension fp" } } */
 /* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */
 /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c 
b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
index e0fb307ac30..bed231fc711 100644
--- a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
+++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
@@ -1,6 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
-/* { dg-additional-options "-mcpu=cortex-m55+nofp -mthumb -mfloat-abi=hard 
-mfpu=auto --save-temps" } */
+/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nofp -mthumb 
-mfloat-abi=hard -mfpu=auto --save-temps" } */
 /* { dg-final { scan-assembler "\.fpu softvfp" } } */
 /* { dg-final { scan-assembler "\.arch_extension mve" } } */
 /* { dg-final { scan-assembler "\.arch_extension dsp" } } */
diff --git a/gcc/testsuite/gcc.target/arm/

[PATCH 17/17] testsuite: arm: Use effective-target for pr96939 test

2024-11-19 Thread Torbjörn SVENSSON
Update test case to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/lto/pr96939_0.c: Use effective-target
arm_arch_v8a.
* gcc.target/arm/lto/pr96939_1.c: Remove dg-options.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/lto/pr96939_0.c | 4 ++--
 gcc/testsuite/gcc.target/arm/lto/pr96939_1.c | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c 
b/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
index 241ffd5da0a..3bb74bd1a1d 100644
--- a/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
+++ b/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
@@ -1,7 +1,7 @@
 /* PR target/96939 */
 /* { dg-lto-do link } */
-/* { dg-require-effective-target arm_arch_v8a_ok } */
-/* { dg-lto-options { { -flto -O2 } } } */
+/* { dg-require-effective-target arm_arch_v8a_link } */
+/* { dg-lto-options { { -flto -O2 -mcpu=unset -march=armv8-a+simd+crc } } } */
 
 extern unsigned crc (unsigned, const void *);
 typedef unsigned (*fnptr) (unsigned, const void *);
diff --git a/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c 
b/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
index 4afdbdaf5ad..c641b5580ab 100644
--- a/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
+++ b/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
@@ -1,5 +1,4 @@
 /* PR target/96939 */
-/* { dg-options "-march=armv8-a+simd+crc" } */
 
 #include 
 
-- 
2.25.1



[PATCH 11/17] testsuite: arm: Use effective-target for pr56184.C and pr59985.C

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.dg/other/pr56184.C: Use effective-target
arm_arch_v7a_neon and arm_arch_v7a_thumb.
* g++.dg/other/pr59985.C: Use effective-target
arm_arch_v7a_neon and arm_arch_v7a_arm.
* lib/target-supports.exp: Define effective-target
arm_arch_v7a_thumb.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.dg/other/pr56184.C  | 8 ++--
 gcc/testsuite/g++.dg/other/pr59985.C  | 7 +--
 gcc/testsuite/lib/target-supports.exp | 1 +
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/g++.dg/other/pr56184.C 
b/gcc/testsuite/g++.dg/other/pr56184.C
index dc949283c98..f4a4300c385 100644
--- a/gcc/testsuite/g++.dg/other/pr56184.C
+++ b/gcc/testsuite/g++.dg/other/pr56184.C
@@ -1,6 +1,10 @@
 /* { dg-do compile { target arm*-*-* } } */
-/* { dg-skip-if "incompatible options" { ! { arm_thumb1_ok || arm_thumb2_ok } 
} } */
-/* { dg-options "-fno-short-enums -O2 -mthumb -march=armv7-a -mfpu=neon 
-mfloat-abi=softfp -mtune=cortex-a9 -fno-section-anchors -Wno-return-type" } */
+/* { dg-require-effective-target arm_arch_v7a_neon_ok } */
+/* { dg-require-effective-target arm_arch_v7a_thumb_ok } */
+/* { dg-options "-fno-short-enums -O2 -fno-section-anchors -Wno-return-type" } 
*/
+/* { dg-add-options arm_arch_v7a_neon } */
+/* { dg-additional-options "-mthumb -mtune=cortex-a9" } */
+
 
 typedef unsigned int size_t;
 __extension__ typedef int __intptr_t;
diff --git a/gcc/testsuite/g++.dg/other/pr59985.C 
b/gcc/testsuite/g++.dg/other/pr59985.C
index 7c9bfab35f1..a0f5e184b43 100644
--- a/gcc/testsuite/g++.dg/other/pr59985.C
+++ b/gcc/testsuite/g++.dg/other/pr59985.C
@@ -1,7 +1,10 @@
 /* { dg-do compile { target arm*-*-* } } */
-/* { dg-skip-if "incompatible options" { arm_thumb1 } } */
-/* { dg-options "-g -fcompare-debug -O2 -march=armv7-a -mtune=cortex-a9 
-mfpu=vfpv3-d16 -mfloat-abi=hard" } */
 /* { dg-skip-if "need hardfp abi" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+/* { dg-require-effective-target arm_arch_v7a_arm_ok } */
+/* { dg-require-effective-target arm_arch_v7a_neon_ok } */
+/* { dg-options "-g -fcompare-debug -O2" } */
+/* { dg-add-options arm_arch_v7a_neon } */
+/* { dg-additional-options "-marm -mtune=cortex-a9 -mfloat-abi=hard 
-mfpu=vfpv3-d16" } */
 
 extern void *f1 (unsigned long, unsigned long);
 extern const struct line_map *f2 (void *, int, unsigned int, const char *, 
unsigned int);
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 30e453a578a..6241c00a752 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5778,6 +5778,7 @@ foreach { armfunc armflag armdefs } {
v7a "-march=armv7-a+fp" __ARM_ARCH_7A__
v7a_arm "-march=armv7-a+fp -marm" "__ARM_ARCH_7A__ && !__thumb__"
v7a_neon "-march=armv7-a+simd -mfpu=auto -mfloat-abi=softfp" 
"__ARM_ARCH_7A__ && __ARM_NEON__"
+   v7a_thumb "-march=armv7-a+fp -mthumb" "__ARM_ARCH_7A__ && __thumb__"
v7r "-march=armv7-r+fp" __ARM_ARCH_7R__
v7m "-march=armv7-m -mthumb -mfloat-abi=soft" __ARM_ARCH_7M__
v7em "-march=armv7e-m+fp -mthumb" __ARM_ARCH_7EM__
-- 
2.25.1



RE: [PATCH]AArch64 Suppress default options when march or mcpu used is not affected by it.

2024-11-19 Thread Tamar Christina
> -Original Message-
> From: Andrew Pinski 
> Sent: Friday, November 15, 2024 7:16 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> ; ktkac...@gcc.gnu.org; Richard Sandiford
> 
> Subject: Re: [PATCH]AArch64 Suppress default options when march or mcpu used
> is not affected by it.
> 
> On Fri, Nov 15, 2024 at 6:30 AM Tamar Christina 
> wrote:
> >
> > Hi All,
> >
> > This patch makes it so that when you use any of the Cortex-A53 errata
> > workarounds but have specified an -march or -mcpu we know is not affected by
> it
> > that we suppress the errata workaround.
> >
> > This is a driver only patch as the linker invocation needs to be changed as
> > well.  The linker and cc SPECs are different because for the linker we 
> > didn't
> > seem to add an inversion flag for the option.  That said, it's also not 
> > possible
> > to configure the linker with it on by default.  So not passing the flag is
> > sufficient to turn it off.
> >
> > For the compilers however we have an inversion flag using -mno-, which is
> needed
> > to disable the workarounds when the compiler has been configured with it by
> > default.
> >
> > Note that theoretically speaking -mcpu=native on a Cortex-A53 would turn it 
> > off,
> > but this should be ok because it's unlikely anyone is running GCC-15+ on a
> > Cortex-A53 which needs it.  If this is a concern I can adjust the patch to 
> > for
> > targets that have HAVE_LOCAL_CPU_DETECT I can make a new custom function
> that
> > re-queries host detection to see if it's an affected system.
> >
> > The workaround has the effect of suppressing certain inlining and 
> > multiply-add
> > formation which leads to about ~1% SPECCPU 2017 Intrate regression on
> modern
> > cores.  This patch is needed because most distros configure GCC with the
> > workaround enabled by default.
> >
> > I tried writing automated testcases for these, however the testsuite doesn't
> > want to scan the output of -### and it makes the excess error tests always 
> > fail
> > unless you use dg-error, which also looks for"error:".  So tested manually:
> 
> You might be able to use dg-message instead. dg-message does not look
> for a `note:` (dg-note), `error:` (dg-note) or `warning:`
> (dg-warning).
> 
> From gcc-dg.exp:
> ```
> # Look for messages that don't have standard prefixes.
> proc dg-message { args } {
> ```
> 

Thanks for the suggestion, I did try it before but both dg-output and 
dg-message fail
 test for excess errors since the -### output goes to stderr.

But I realized I misunderstood the dg-message syntax and found a way to silence 
the
excess error. So respinning the patch.

Thanks,
Tamar
> Thanks,
> Andrew Pinski
> 
> >
> > > gcc -mcpu=neoverse-v1 -mfix-cortex-a53-835769 -xc - -O3 -o - < /dev/null 
> > > -###
> 2>&1 | grep "\-mfix" | wc -l
> > 0
> >
> > > gcc -mfix-cortex-a53-835769 -xc - -O3 -o - < /dev/null -### 2>&1 | grep 
> > > "\-
> mfix" | wc -l
> > 5
> >
> > > gcc -mfix-cortex-a53-835769 -march=armv8-a -xc - -O3 -o - < /dev/null -###
> 2>&1 | grep "\-mfix" | wc -l
> > 5
> >
> > > gcc -mfix-cortex-a53-835769 -march=armv8.1-a -xc - -O3 -o - < /dev/null 
> > > -###
> 2>&1 | grep "\-mfix" | wc -l
> > 0
> >
> > > gcc -mfix-cortex-a53-835769 -march=armv8.1-a -xc - -O3 -o - < /dev/null 
> > > -###
> 2>&1 | grep "\-\-fix" | wc -l
> > 0
> >
> > > gcc -mfix-cortex-a53-835769 -march=armv8-a -xc - -O3 -o - < /dev/null -###
> 2>&1 | grep "\-\-fix" | wc -l
> > 1
> >
> > > -gcc -mfix-cortex-a53-835769 -xc - -O3 -o - < /dev/null -### 2>&1 | grep 
> > > "\-\-
> fix" | wc -l
> > 1
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-errata.h (TARGET_SUPPRESS_OPT_SPEC,
> > TARGET_TURN_OFF_OPT_SPEC, CA53_ERR_835769_COMPILE_SPEC,
> > CA53_ERR_843419_COMPILE_SPEC): New.
> > (CA53_ERR_835769_SPEC, CA53_ERR_843419_SPEC): Use them.
> > (AARCH64_ERRATA_COMPILE_SPEC):
> > * config/aarch64/aarch64-elf-raw.h (CC1_SPEC, CC1PLUS_SPEC): Add
> > AARCH64_ERRATA_COMPILE_SPEC.
> > * config/aarch64/aarch64-freebsd.h (CC1_SPEC, CC1PLUS_SPEC): 
> > Likewise.
> > * config/aarch64/aarch64-gnu.h (CC1_SPEC, CC1PLUS_SPEC): Likewise.
> > * config/aarch64/aarch64-linux.h (CC1_SPEC, CC1PLUS_SPEC): Likewise.
> > * config/aarch64/aarch64-netbsd.h (CC1_SPEC, CC1PLUS_SPEC): 
> > Likewise.
> > * doc/invoke.texi: Document it.
> >
> > ---
> > diff --git a/gcc/config/aarch64/aarch64-elf-raw.h
> b/gcc/config/aarch64/aarch64-elf-raw.h
> > index
> 5396da9b2d626e23e4c4d56e19cd7aa70804c475..8442a664c4fdedd9696da90
> e6727293c4d472a3f 100644
> > --- a/gcc/config/aarch64/aarch64-elf-raw.h
> > +++ b/gcc/config/aarch64/aarch64-elf-raw.h
> > @@ -38,4 +38,12 @@
> >AARCH64_ERRATA_LINK_SPEC
> >  #endif
> >
> > +#ifndef CC1_SPEC
> > +# define CC1_SPEC AARCH64_ERRATA_COMPILE_SPEC
> > +#endif
> > +
> > +#i

[PATCH 12/17] testsuite: arm: Use -march=unset for bfloat16_scalar* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/bfloat16_scalar_1_2.c: Added option
"-march=unset".
* gcc.target/arm/bfloat16_scalar_2_1.c: Likewise.
* gcc.target/arm/bfloat16_scalar_2_2.c: Likewise.
* gcc.target/arm/bfloat16_scalar_3_1.c: Likewise.
* gcc.target/arm/bfloat16_scalar_3_2.c: Likewise.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c | 2 +-
 gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c | 2 +-
 gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c | 2 +-
 gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c | 2 +-
 gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c 
b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
index 8293cafcc14..0d4c3ffec53 100644
--- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { arm*-*-* } } } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
-/* { dg-additional-options "-march=armv8.2-a+bf16 -mfloat-abi=softfp 
-mfpu=auto" } */
+/* { dg-additional-options "-mcpu=unset -march=armv8.2-a+bf16 
-mfloat-abi=softfp -mfpu=auto" } */
 /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c 
b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
index e84f837e162..43c6ce0c1d3 100644
--- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { arm*-*-* } } } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
-/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=hard 
-mfpu=neon-fp-armv8" } */
+/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=hard 
-mfpu=neon-fp-armv8" } */
 /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c 
b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
index 93ec059819a..64b584ea34c 100644
--- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { arm*-*-* } } } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
-/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=softfp 
-mfpu=neon-fp-armv8" } */
+/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=softfp 
-mfpu=neon-fp-armv8" } */
 /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c 
b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
index a1a70690322..eb9baba9cd8 100644
--- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { arm*-*-* } } } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
-/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=hard 
-mfpu=neon-fp-armv8" } */
+/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=hard 
-mfpu=neon-fp-armv8" } */
 /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c 
b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
index f49072613f0..74a74ec54fe 100644
--- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
@@ -1,7 +1,7 @@
 /* { dg-do assemble { target { arm*-*-* } } } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
-/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=softfp 
-mfpu=neon-fp-armv8" } */
+/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=softfp 
-mfpu=neon-fp-armv8" } */
 /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
-- 
2.25.1



Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jakub Jelinek
On Tue, Nov 19, 2024 at 10:25:16AM +0100, Richard Biener wrote:
> I think it's pretty clear and easy to describe to users what "m " and 
> what "mC" do.  But with "pure" this is an odd intermediate state.  For both
> "m " and "mP" you suggest above the new/delete might modify their
> global state but as you can't rely on the new/delete pair to prevail
> you cannot rely on the modification to happen.  But how do you explain
> that

If we are willing to make the default not strictly conforming (i.e.
basically revert PR101480 by default and make the GCC 11.1/11.2 behavior
the default and allow -fno-sane-operators-new-delete to change to GCC
11.3/14.* behavior), I can live with it.
But we need to make the documentation clear that the default is not strictly
conforming.

Jakub



[PATCH 03/17] testsuite: arm: Use effective-target for crc_hf_1.c test

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/crc_hf_1.c: Use effective-target
arm_arch_v8a_hard and added option "-mcpu=unset".

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/acle/crc_hf_1.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/acle/crc_hf_1.c 
b/gcc/testsuite/gcc.target/arm/acle/crc_hf_1.c
index f1de2bdffee..378f5f04217 100644
--- a/gcc/testsuite/gcc.target/arm/acle/crc_hf_1.c
+++ b/gcc/testsuite/gcc.target/arm/acle/crc_hf_1.c
@@ -2,8 +2,9 @@
break CRC intrinsics.  */
 
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8a_hard_ok }  */
 /* { dg-require-effective-target arm_hard_vfp_ok }  */
-/* { dg-additional-options "-mfloat-abi=hard -march=armv8-a+simd+crc" } */
+/* { dg-additional-options "-mfloat-abi=hard -mcpu=unset 
-march=armv8-a+simd+crc" } */
 
 #include 
 
-- 
2.25.1



[PATCH 04/17] testsuite: arm: Use effective-target for pure-code/* tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pure-code/no-literal-pool-m0.c: Use
effective-target arm_cpu_cortex-m0.
* gcc.target/arm/pure-code/no-literal-pool-m23.c: Use
effective-target arm_cpu_cortex-m23.
* gcc.target/arm/pure-code/pr109800.c: Use effective-target
arm_arch_v7m and added option "-mcpu=unset".
* target-supports.exp: Define effective-target
arm_cpu_cortex_m0 and arm_cpu_cortex_m23.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c  | 5 +++--
 gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c | 5 +++--
 gcc/testsuite/gcc.target/arm/pure-code/pr109800.c| 3 ++-
 gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c   | 5 +++--
 gcc/testsuite/lib/target-supports.exp| 2 ++
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c 
b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
index bd6f4af183b..5bdbebb1a53 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
-/* { dg-options "-mpure-code -mcpu=cortex-m0 -march=armv6s-m -mthumb 
-mfloat-abi=soft" } */
+/* { dg-require-effective-target arm_cpu_cortex_m0_ok } */
+/* { dg-options "-mpure-code" } */
+/* { dg-add-options arm_cpu_cortex_m0 }*/
 /* { dg-final { check-function-bodies "**" "" } } */
 
 /* Does not use thumb1_gen_const_int.
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c 
b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
index 95370126ce8..80a6b51138b 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
-/* { dg-options "-mpure-code -mcpu=cortex-m23 -march=armv8-m.base -mthumb 
-mfloat-abi=soft" } */
+/* { dg-require-effective-target arm_cpu_cortex_m23_ok } */
+/* { dg-options "-mpure-code" } */
+/* { dg-add-options arm_cpu_cortex_m23 } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 /*
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c 
b/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
index d797b790232..ace37cd6bc9 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v7m_link } */
 /* { dg-require-effective-target arm_hard_ok } */
-/* { dg-options "-O2 -march=armv7-m -mfloat-abi=hard -mfpu=fpv4-sp-d16 
-mbig-endian -mpure-code" } */
+/* { dg-options "-O2 -mcpu=unset -march=armv7-m -mfloat-abi=hard 
-mfpu=fpv4-sp-d16 -mbig-endian -mpure-code" } */
 double f() { return 5.0; }
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c 
b/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
index 31061d5d445..68c223fbd15 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
-/* { dg-options "-mpure-code -mcpu=cortex-m23 -march=armv8-m.base -mthumb 
-mfloat-abi=soft" } */
+/* { dg-require-effective-target arm_cpu_cortex_m23_ok } */
+/* { dg-options "-mpure-code" } */
+/* { dg-add-options arm_cpu_cortex_m23 } */
 
 typedef int __attribute__ ((__vector_size__ (16))) V;
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 01ed55ed82f..d973b1863bd 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5848,6 +5848,8 @@ foreach { armfunc armflag armdefs } {
 foreach { armfunc armflag armdefs } {
xscale_arm "-mcpu=xscale -mfloat-abi=soft -marm" "__XSCALE__ && 
!__thumb__"
cortex_a57 "-mcpu=cortex-a57" __ARM_ARCH_8A__
+   cortex_m0 "-mcpu=cortex-m0 -mfloat-abi=soft -mthumb" 
"__ARM_ARCH_6M__ && __thumb__"
+   cortex_m23 "-mcpu=cortex-m23 -mfloat-abi=soft -mthumb" 
"__ARM_ARCH_8M_BASE__  && __thumb__"
} {
 eval [string map [list FUNC $armfunc FLAG $armflag DEFS $armdefs ] {
proc check_effective_target_arm_cpu_FUNC_ok { } {
-- 
2.25.1



[PATCH 10/17] testsuite: arm: Use effective target for pr57735.C test

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.dg/ext/pr57735.C: Use effective-target arm_cpu_xscale_arm.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.dg/ext/pr57735.C | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/g++.dg/ext/pr57735.C 
b/gcc/testsuite/g++.dg/ext/pr57735.C
index d9fc9e4aa5e..256a57ad0e2 100644
--- a/gcc/testsuite/g++.dg/ext/pr57735.C
+++ b/gcc/testsuite/g++.dg/ext/pr57735.C
@@ -1,8 +1,7 @@
 /* { dg-do compile { target arm*-*-* } } */
-/* { dg-require-effective-target arm_arch_v5te_ok } */
-/* { dg-require-effective-target arm_arm_ok } */
-/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } 
{"-mfloat-abi=soft" } } */
-/* { dg-options "-march=armv5te -marm  -mtune=xscale -mfloat-abi=soft -O1 
-Wno-return-type" } */
+/* { dg-require-effective-target arm_cpu_xscale_arm_ok } */
+/* { dg-options "-O1 -Wno-return-type" } */
+/* { dg-add-options arm_cpu_xscale_arm } */
 
 typedef unsigned int size_t;
 __extension__
-- 
2.25.1



[PATCH 15/17] testsuite: arm: Use -mcpu=unset when overriding -march

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:
* gcc.dg/pr41574.c: Added option "-mcpu=unset".
* gcc.dg/pr59418.c: Likewise.
* lib/target-supports.exp (add_options_for_vect_early_break):
Likewise.
(add_options_for_arm_v8_neon): Likewise.
(check_effective_target_arm_neon_ok_nocache): Likewise.
(check_effective_target_arm_simd32_ok_nocache): Likewise.
(check_effective_target_arm_sat_ok_nocache): Likewise.
(check_effective_target_arm_dsp_ok_nocache): Likewise.
(check_effective_target_arm_crc_ok_nocache): Likewise.
(add_options_for_aarch64_sve): Likewise.
(check_effective_target_arm_v8_neon_ok_nocache): Likewise.
(check_effective_target_aarch64_fjcvtzs_hw): Likewise.
(check_effective_target_arm_v8_1m_mve_fp_ok_nocache): Likewise.
(check_effective_target_arm_v8_1a_neon_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache):
Likewise.
(check_effective_target_arm_v8_2a_fp16_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_mve_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_i8mm_ok_nocache): Likewise.
(check_effective_target_arm_fp16fml_neon_ok_nocache): Likewise.
(check_effective_target_arm_v8_2a_bf16_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8m_main_cde_ok_nocache): Likewise.
(check_effective_target_arm_v8m_main_cde_fp_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_main_cde_mve_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1m_main_cde_mve_fp_ok_nocache):
Likewise.
(check_effective_target_aarch64_asm_fp_ok): Likewise.
(check_effective_target_aarch64_asm_simd_ok): Likewise.
(check_effective_target_aarch64_asm_crypto_ok): Likewise.
(check_effective_target_aarch64_asm_crc_ok): Likewise.
(check_effective_target_aarch64_asm_lse_ok): Likewise.
(check_effective_target_aarch64_asm_dotprod_ok): Likewise.
(check_effective_target_aarch64_asm_sve_ok): Likewise.
(check_effective_target_aarch64_asm_i8mm_ok): Likewise.
(check_effective_target_aarch64_asm_f32mm_ok): Likewise.
(check_effective_target_aarch64_asm_f64mm_ok): Likewise.
(check_effective_target_aarch64_asm_bf16_ok): Likewise.
(check_effective_target_aarch64_asm_sb_ok): Likewise.
(check_effective_target_aarch64_asm_sve2_ok): Likewise.
(check_effective_target_aarch64_asm_ls64_ok): Likewise.
(check_effective_target_aarch64_asm_sme_ok): Likewise.
(check_effective_target_aarch64_asm_sme-i16i64_ok): Likewise.
(check_effective_target_aarch64_asm_sme2_ok): Likewise.
(check_effective_target_arm_v8_3a_complex_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache):
Likewise.
(check_effective_target_arm_v8_1_lob_ok): Likewise.
---
 gcc/testsuite/gcc.dg/pr41574.c|  2 +-
 gcc/testsuite/gcc.dg/pr59418.c|  2 +-
 gcc/testsuite/lib/target-supports.exp | 66 +--
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr41574.c b/gcc/testsuite/gcc.dg/pr41574.c
index 062c0044532..e25295bc4fd 100644
--- a/gcc/testsuite/gcc.dg/pr41574.c
+++ b/gcc/testsuite/gcc.dg/pr41574.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon 
-fno-unsafe-math-optimizations -fdump-rtl-combine" { target { arm*-*-* } } } */
+/* { dg-options "-O2 -mcpu=unset -march=armv7-a -mfloat-abi=softfp -mfpu=neon 
-fno-unsafe-math-optimizations -fdump-rtl-combine" { target { arm*-*-* } } } */
 /* { dg-options "-O2 -fno-unsafe-math-optimizations -fdump-rtl-combine" { 
target { ! arm*-*-* } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/pr59418.c b/gcc/testsuite/gcc.dg/pr59418.c
index 4b54ef2b42d..6ab46ecde8a 100644
--- a/gcc/testsuite/gcc.dg/pr59418.c
+++ b/gcc/testsuite/gcc.dg/pr59418.c
@@ -3,7 +3,7 @@
 
 /* { dg-do compile } */
 /* { dg-options "-Os -g" } */
-/* { dg-options "-march=armv7-a+fp -mfloat-abi=hard -Os -g" { target { 
arm*-*-* && { ! arm_thumb1 } } } } */
+/* { dg-options "-mcpu=unset -march=armv7-a+fp -mfloat-abi=hard -Os -g" { 
target { arm*-*-* && { ! arm_thumb1 } } } } */
 
 extern int printf (const char *__format, ...);
 double bar (const char *, int);
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6241c00a752..54c7af5dae4 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4351,7 +4351,7 @@ proc add_options_for_vect_early_break { flags } {
 
 if { [check_effective_target_arm_v8_neon_ok] } {
   

[PATCH 09/17] testsuite: arm: Use effective-target for nomve_fp_1.c test

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.target/arm/mve/general-c++/nomve_fp_1.c: Added option
"-mcpu=unset".

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c 
b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
index a2069d353cf..fd8c05b0eed 100644
--- a/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
+++ b/gcc/testsuite/g++.target/arm/mve/general-c++/nomve_fp_1.c
@@ -4,7 +4,7 @@
 /* Do not use dg-add-options arm_v8_1m_mve, because this might expand to "",
which could imply mve+fp depending on the user settings. We want to make
sure the '+fp' extension is not enabled.  */
-/* { dg-options "-mfpu=auto -march=armv8.1-m.main+mve" } */
+/* { dg-options "-mfpu=auto -mcpu=unset -march=armv8.1-m.main+mve" } */
 /* { dg-add-options arm_fp } */
 
 #include 
-- 
2.25.1



[PATCH 07/17] testsuite: arm: Use effective-target for {gcc, g++}.target/arm/ tests

2024-11-19 Thread Torbjörn SVENSSON
Update test cases to use -mcpu=unset/-march=unset feature introduced in
r15-3606-g7d6c6a0d15c.

gcc/testsuite/ChangeLog:

* g++.target/arm/pr103676.C: Use effective-target
arm_cpu_cortex_m7.
* gcc.target/arm/no-volatile-in-it.c: Likewise.
* gcc.target/arm/fma-sp.c: Use effective-target
arm_cpu_cortex_m4.
* gcc.target/arm/pr53859.c: Likewise.
* gcc.target/arm/mve/intrinsics/pr97327.c: Use effective-target
arm_cpu_cortex_m55.
* gcc.target/arm/pr65067.c: Use effective-target
arm_cpu_cortex_m3.
* lib/target-supports.exp: Define effective-target
arm_cpu_cortex_m3, arm_cpu_cortex_m4, arm_cpu_cortex_m7 and
arm_cpu_cortex_m55.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/g++.target/arm/pr103676.C   | 5 -
 gcc/testsuite/gcc.target/arm/fma-sp.c | 8 +++-
 gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c | 7 ---
 gcc/testsuite/gcc.target/arm/no-volatile-in-it.c  | 5 +++--
 gcc/testsuite/gcc.target/arm/pr53859.c| 4 +++-
 gcc/testsuite/gcc.target/arm/pr65067.c| 4 +++-
 gcc/testsuite/lib/target-supports.exp | 4 
 7 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/gcc/testsuite/g++.target/arm/pr103676.C 
b/gcc/testsuite/g++.target/arm/pr103676.C
index 1607564ff5d..1e5711626f8 100644
--- a/gcc/testsuite/g++.target/arm/pr103676.C
+++ b/gcc/testsuite/g++.target/arm/pr103676.C
@@ -1,6 +1,9 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_cpu_cortex_m7_ok } */
 /* { dg-require-effective-target arm_thumb1_ok } */
-/* { dg-additional-options "-mcpu=cortex-m7 -mthumb -O2" }  */
+/* { dg-additional-options "-O2" }  */
+/* { dg-add-options arm_cpu_cortex_m7 } */
+
 
 typedef unsigned long long uint64_t;
 struct timer {
diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c 
b/gcc/testsuite/gcc.target/arm/fma-sp.c
index e1884545f0d..539628d5d26 100644
--- a/gcc/testsuite/gcc.target/arm/fma-sp.c
+++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
@@ -1,9 +1,7 @@
 /* { dg-do compile } */
-/* { dg-skip-if "avoid conflicts with multilib options" { ! arm_thumb2_ok } { 
"-march=*" } { "" } } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mcpu=*" 
} { "-mcpu=cortex-m4" } } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { "-mfpu=*" 
} { "-mfpu=fpv4-sp-d16" } } */
-/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
"-mfloat-abi=*" } { "-mfloat-abi=hard" } } */
-/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb 
-mfloat-abi=hard" } */
+/* { dg-require-effective-target arm_cpu_cortex_m4_ok } */
+/* { dg-options "-O2 -mfpu=fpv4-sp-d16 -mfloat-abi=hard" } */
+/* { dg-add-options arm_cpu_cortex_m4 } */
 
 #include "fma.h"
 
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
index d19bde59266..34b7af6022b 100644
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
@@ -1,6 +1,7 @@
-/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
-/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=hard" } { "" 
} } */
-/* { dg-additional-options "-mcpu=cortex-m55 -mthumb -mfloat-abi=soft 
-mfpu=auto -Werror" } */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_cpu_cortex_m55_ok } */
+/* { dg-additional-options "-mfloat-abi=soft -mfpu=auto -Werror" } */
+/* { dg-add-options arm_cpu_cortex_m55 } */
 
 int main ()
 {
diff --git a/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c 
b/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
index 6f3664d3b3d..119e9ecf321 100644
--- a/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
+++ b/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
@@ -1,7 +1,8 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_cpu_cortex_m7_ok } */
 /* { dg-require-effective-target arm_thumb2_ok } */
-/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-march=*" "-mcpu=*" } { 
"-mcpu=cortex-m7" } } */
-/* { dg-options "-Os -mthumb -mcpu=cortex-m7" } */
+/* { dg-options "-Os" } */
+/* { dg-add-options arm_cpu_cortex_m7 } */
 
 int
 foo (int a, int b, volatile int *c, volatile int *d)
diff --git a/gcc/testsuite/gcc.target/arm/pr53859.c 
b/gcc/testsuite/gcc.target/arm/pr53859.c
index 003489e0bb7..46ae40b67fa 100644
--- a/gcc/testsuite/gcc.target/arm/pr53859.c
+++ b/gcc/testsuite/gcc.target/arm/pr53859.c
@@ -1,7 +1,9 @@
 /* PR target/53859 */
 /* { dg-do compile } */
+/* { dg-require-effective-target arm_cpu_cortex_m4_ok } */
 /* { dg-require-effective-target arm_thumb2_ok } */
-/* { dg-options "-mcpu=cortex-m4 -mthumb -O2" } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_cpu_cortex_m4 } */
 
 void bar (int,int,char* ,int);
 
diff --git a/gcc/testsuite/gcc.target/arm/pr65067.c 
b/gcc/testsuite/gcc.target/arm/pr65067.c
index 05da29483f6..a2ec7450c0b 10064

Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jan Hubicka
> On Tue, Nov 19, 2024 at 10:25:16AM +0100, Richard Biener wrote:
> > I think it's pretty clear and easy to describe to users what "m " and 
> > what "mC" do.  But with "pure" this is an odd intermediate state.  For both
> > "m " and "mP" you suggest above the new/delete might modify their
> > global state but as you can't rely on the new/delete pair to prevail
> > you cannot rely on the modification to happen.  But how do you explain
> > that
> 
> If we are willing to make the default not strictly conforming (i.e.
> basically revert PR101480 by default and make the GCC 11.1/11.2 behavior
> the default and allow -fno-sane-operators-new-delete to change to GCC
> 11.3/14.* behavior), I can live with it.
> But we need to make the documentation clear that the default is not strictly
> conforming.
It is not clear to me that "mP" means strict confromance.
In my understanding standard allows new/delete operators to be extended
to update some stats structure which records i.e. amount of allocated
memory.  Its walue will depend on optimization - if we optimize out
new/delete pair, memory will be not recorded which is stil meaningful
use case.

Now the extended new/delete may be invisible to compiler while the
compiler visible code may have other means of allocation (like our ggc)
and update same stats too.  With mP this may lead to situation where we
hoists compiler visible code optimizes out loads from this structure
across new call (since it is mP and we assume it to not modify it) and
later store missing its update which can lead to inconsistent state of
it.

so I would say that both mC and mP are (slightly?) non-conforming.
Honza


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jakub Jelinek
On Tue, Nov 19, 2024 at 09:35:06AM +0100, Richard Biener wrote:
> > This all seems excessively complicated; can't it be simplified a bit?
> 
> I'd like to second this - I don't see value in adding the "intermediate"
> state which we never had.  We for quite some time had two extremes, if
> we want to have a separate flag controlling this besides -fallocation-dce
> the go with dual-state please.  I'd even say we don't need any new
> flag but can use the -fallocation-dce umbrella to control how the
> middle-end interprets new/delete semantically since the ability to
> elide new/delete implies that the user at least doesn't care about
> side-effects.  Documenting the behavior clearly is of course good.

The problem with the two states we had/have is that they are too extreme.

Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
backup in the standard, I think the PR101480 testcase is perfectly
valid C++ and so it probably isn't a good idea to enable such behavior
by default; with an extra switch sure, user promises not to rely on it
and the optimization can be done.

The current one (i.e. those "m " etc.) are too weak, otherwise we wouldn't
be discussing this PR, it really penalizes too much code in the wild.

The proposed intermediate one is what LLVM clearly implements and as I wrote
my reading of the standard is that while the user replaced global operators
certainly can modify visible global state, the callers can't really rely
on it because there is no guarantee it will be called at all and so I hope
we can do that by default.

I think the step from the no optimizations to the intermediate state is
much more important for C++ code in the wild, by making the operators have
pure-ish behavior one doesn't need to reread everything from memory, typical
C++ has data in classes, many of those escape somewhere and having to reread
everything after every new/delete operator just in case those have changed
those is expensive.

The step from intermediate state to the full optimization one is primarily
about DSE of global stores before the operators, but I think the most
important from those (stores to the memory actually being deleted) we can
already delete anyway if it is call from delete because there are
destructors happening before that and so the operators can't expect
some particular values in that memory.

Anyway, I wonder what Jason/Jonathan think about this from the C++ standard
POV.

Jakub



testsuite: m68k: Fix tests for C23

2024-11-19 Thread Andreas Schwab
* gcc.target/m68k/crash1.c (seq_printf): Add prototype.
* gcc.target/m68k/pr63347.c (oof): Add missing parameter.
---
 gcc/testsuite/gcc.target/m68k/crash1.c  | 2 +-
 gcc/testsuite/gcc.target/m68k/pr63347.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/m68k/crash1.c 
b/gcc/testsuite/gcc.target/m68k/crash1.c
index 2554d62cedd..9188e99b98a 100644
--- a/gcc/testsuite/gcc.target/m68k/crash1.c
+++ b/gcc/testsuite/gcc.target/m68k/crash1.c
@@ -15,7 +15,7 @@ struct kernel_stat
  unsigned irqs[256];
 };
 extern struct kernel_stat per_cpu__kstat;
-void seq_printf ();
+void seq_printf (...);
 
 void show_stat(void)
 {
diff --git a/gcc/testsuite/gcc.target/m68k/pr63347.c 
b/gcc/testsuite/gcc.target/m68k/pr63347.c
index b817f4694f3..d413b3f8655 100644
--- a/gcc/testsuite/gcc.target/m68k/pr63347.c
+++ b/gcc/testsuite/gcc.target/m68k/pr63347.c
@@ -2,7 +2,7 @@
 /* { dg-options "-O2 -mcpu=5208 -w" } */
 
 void __attribute__ ((noinline))
-oof()
+oof(const char *s)
 {
   asm volatile ("" ::: "memory");
 }
-- 
2.47.0


-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Richard Biener
On Tue, 19 Nov 2024, Jakub Jelinek wrote:

> On Tue, Nov 19, 2024 at 09:35:06AM +0100, Richard Biener wrote:
> > > This all seems excessively complicated; can't it be simplified a bit?
> > 
> > I'd like to second this - I don't see value in adding the "intermediate"
> > state which we never had.  We for quite some time had two extremes, if
> > we want to have a separate flag controlling this besides -fallocation-dce
> > the go with dual-state please.  I'd even say we don't need any new
> > flag but can use the -fallocation-dce umbrella to control how the
> > middle-end interprets new/delete semantically since the ability to
> > elide new/delete implies that the user at least doesn't care about
> > side-effects.  Documenting the behavior clearly is of course good.
> 
> The problem with the two states we had/have is that they are too extreme.
> 
> Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
> backup in the standard, I think the PR101480 testcase is perfectly
> valid C++ and so it probably isn't a good idea to enable such behavior
> by default; with an extra switch sure, user promises not to rely on it
> and the optimization can be done.
> 
> The current one (i.e. those "m " etc.) are too weak, otherwise we wouldn't
> be discussing this PR, it really penalizes too much code in the wild.
> 
> The proposed intermediate one is what LLVM clearly implements and as I wrote
> my reading of the standard is that while the user replaced global operators
> certainly can modify visible global state, the callers can't really rely
> on it because there is no guarantee it will be called at all and so I hope
> we can do that by default.
> 
> I think the step from the no optimizations to the intermediate state is
> much more important for C++ code in the wild, by making the operators have
> pure-ish behavior one doesn't need to reread everything from memory, typical
> C++ has data in classes, many of those escape somewhere and having to reread
> everything after every new/delete operator just in case those have changed
> those is expensive.
> 
> The step from intermediate state to the full optimization one is primarily
> about DSE of global stores before the operators, but I think the most
> important from those (stores to the memory actually being deleted) we can
> already delete anyway if it is call from delete because there are
> destructors happening before that and so the operators can't expect
> some particular values in that memory.
> 
> Anyway, I wonder what Jason/Jonathan think about this from the C++ standard
> POV.

I think it's pretty clear and easy to describe to users what "m " and 
what "mC" do.  But with "pure" this is an odd intermediate state.  For both
"m " and "mP" you suggest above the new/delete might modify their
global state but as you can't rely on the new/delete pair to prevail
you cannot rely on the modification to happen.  But how do you explain
that

 state = 1;
 p = new X;
 delete p;
 cout << state;

"works" but

 cout << state;
 p = new X;
 cout << state;
 delete p;
 cout << state;

behaves differently from

 p = new X;
 cout << state;
 delete p;
 cout << state;

when the allocation is not elided and using "mP"?  I think the
actual behavior with "mP" is "weird", actual new/delete that really
just inspect global memory but not modify it are not going to be
important.

Richard.

>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Richard Biener
On Tue, 19 Nov 2024, Jan Hubicka wrote:

> > 
> > The problem with the two states we had/have is that they are too extreme.
> > 
> > Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
> > backup in the standard, I think the PR101480 testcase is perfectly
> > valid C++ and so it probably isn't a good idea to enable such behavior
> > by default; with an extra switch sure, user promises not to rely on it
> > and the optimization can be done.
> > 
> > The current one (i.e. those "m " etc.) are too weak, otherwise we wouldn't
> > be discussing this PR, it really penalizes too much code in the wild.
> 
> I have WIP patch (which needs polishing for next stage 1) to extend
> modref to detect functions doing allocations that does not escape to
> global state as pure/const since allocation+deallocation should not be
> observable.

I think that's the wrong way around - what the C++ standard seems to
imply is that we should be able to elide the allocation if the
allocated memory is unused and we are allowed to ignore side-effects
on global memory.  But iff the allocation takes place the side-effects
on global memory are of course visible.

That is, in DCE terms, global memory use/def should not make
the allocation/deallocation necessary.  In practice this will
then still need two DCE runs to optimize things fully, the first
can elide the allocation but only the second can elide things
made necessary by the allocation functions.

IIRC that's what is implemented right now, correct?

For the allocation to not be barriers for other optimization there's
no guarantee from the C++ standard and so we can add another flag.
But allocation functions only reading but not storing global memory
and users caring sounds like not going to be common to be worth
optimizing for.

Richard.


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jan Hubicka
> 
> The problem with the two states we had/have is that they are too extreme.
> 
> Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
> backup in the standard, I think the PR101480 testcase is perfectly
> valid C++ and so it probably isn't a good idea to enable such behavior
> by default; with an extra switch sure, user promises not to rely on it
> and the optimization can be done.
> 
> The current one (i.e. those "m " etc.) are too weak, otherwise we wouldn't
> be discussing this PR, it really penalizes too much code in the wild.

I have WIP patch (which needs polishing for next stage 1) to extend
modref to detect functions doing allocations that does not escape to
global state as pure/const since allocation+deallocation should not be
observable.

Operator new with "mP" adds here extra twist.  We need to assume that
function reads global state (so everything globally accessible needs to
be stored to memory prior call) but its observable side-effects after
return does not depend on this.

So having "mP" default is extra complication here.  Not terribly bad -
we ignore the loads for value numbering and use them for DSE/DCE.

Honza


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jan Hubicka
> On Tue, 19 Nov 2024, Jan Hubicka wrote:
> 
> > > 
> > > The problem with the two states we had/have is that they are too extreme.
> > > 
> > > Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
> > > backup in the standard, I think the PR101480 testcase is perfectly
> > > valid C++ and so it probably isn't a good idea to enable such behavior
> > > by default; with an extra switch sure, user promises not to rely on it
> > > and the optimization can be done.
> > > 
> > > The current one (i.e. those "m " etc.) are too weak, otherwise we wouldn't
> > > be discussing this PR, it really penalizes too much code in the wild.
> > 
> > I have WIP patch (which needs polishing for next stage 1) to extend
> > modref to detect functions doing allocations that does not escape to
> > global state as pure/const since allocation+deallocation should not be
> > observable.
> 
> I think that's the wrong way around - what the C++ standard seems to
> imply is that we should be able to elide the allocation if the
> allocated memory is unused and we are allowed to ignore side-effects
> on global memory.  But iff the allocation takes place the side-effects
> on global memory are of course visible.

If we allow function that does allocation+deallocation to be pure
and later out call to it since we know that eveyrhing it computes is
dead or redundant, it is the same as inlining it, optimizing out
everything it calculates (since it is dead or redundant) and then
removing the allocatin pair as dead.

So it should be safe with wording of the standard + the extra assumption
that new/delete is const/pure in fnspec way (no side effects on memory)
as done by Jakub's patch. If we expect new/delete to clobber global
memory but still be optimizable out (our current default) the
transformation would be still possible, but needs extra logic to handle
"if executed it may change something but doing so is optional".

Honza
> 
> That is, in DCE terms, global memory use/def should not make
> the allocation/deallocation necessary.  In practice this will
> then still need two DCE runs to optimize things fully, the first
> can elide the allocation but only the second can elide things
> made necessary by the allocation functions.
> 
> IIRC that's what is implemented right now, correct?
> 
> For the allocation to not be barriers for other optimization there's
> no guarantee from the C++ standard and so we can add another flag.
> But allocation functions only reading but not storing global memory
> and users caring sounds like not going to be common to be worth
> optimizing for.
> 
> Richard.


Re: [PATCH] Add -f{,no-}assume-sane-operators-new-delete{,={0,1,2}} options [PR110137]

2024-11-19 Thread Jan Hubicka
> > On Tue, 19 Nov 2024, Jan Hubicka wrote:
> > 
> > > > 
> > > > The problem with the two states we had/have is that they are too 
> > > > extreme.
> > > > 
> > > > Our old one (i.e. those "mC" etc.) is too strong and doesn't have any
> > > > backup in the standard, I think the PR101480 testcase is perfectly
> > > > valid C++ and so it probably isn't a good idea to enable such behavior
> > > > by default; with an extra switch sure, user promises not to rely on it
> > > > and the optimization can be done.
> > > > 
> > > > The current one (i.e. those "m " etc.) are too weak, otherwise we 
> > > > wouldn't
> > > > be discussing this PR, it really penalizes too much code in the wild.
> > > 
> > > I have WIP patch (which needs polishing for next stage 1) to extend
> > > modref to detect functions doing allocations that does not escape to
> > > global state as pure/const since allocation+deallocation should not be
> > > observable.
> > 
> > I think that's the wrong way around - what the C++ standard seems to
> > imply is that we should be able to elide the allocation if the
> > allocated memory is unused and we are allowed to ignore side-effects
> > on global memory.  But iff the allocation takes place the side-effects
> > on global memory are of course visible.
> 
> If we allow function that does allocation+deallocation to be pure
> and later out call to it since we know that eveyrhing it computes is
   ^ optimize 
> dead or redundant, it is the same as inlining it, optimizing out
> everything it calculates (since it is dead or redundant) and then
> removing the allocatin pair as dead.
> 
> So it should be safe with wording of the standard + the extra assumption
> that new/delete is const/pure in fnspec way (no side effects on memory)
> as done by Jakub's patch. If we expect new/delete to clobber global
> memory but still be optimizable out (our current default) the
> transformation would be still possible, but needs extra logic to handle
> "if executed it may change something but doing so is optional".
> 
> Honza


Re: [PATCH] expand: Fix up ICE on VCE from _Complex types to _BitInt [PR117458]

2024-11-19 Thread Richard Biener
On Tue, 19 Nov 2024, Jakub Jelinek wrote:

> Hi!
> 
> extract_bit_field can't handle extraction of non-mode precision
> from complex mode operands which don't live in memory, e.g. gen_lowpart
> crashes on those.
> The following patch in that case defers the extract_bit_field call
> until op0 is forced into memory.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2024-11-19  Jakub Jelinek  
> 
>   PR middle-end/117458
>   * expr.cc (expand_expr_real_1) : Don't
>   call extract_bit_field if op0 has complex mode and isn't a MEM,
>   instead first force op0 into memory and then call extract_bit_field.
> 
>   * gcc.dg/bitint-116.c: New test.
> 
> --- gcc/expr.cc.jj2024-11-08 12:38:10.684495739 +0100
> +++ gcc/expr.cc   2024-11-18 18:35:47.259349847 +0100
> @@ -12505,7 +12505,9 @@ expand_expr_real_1 (tree exp, rtx target
>   op0 = convert_modes (mode, GET_MODE (op0), op0,
>TYPE_UNSIGNED (TREE_TYPE (treeop0)));
>/* If the output type is a bit-field type, do an extraction.  */
> -  else if (reduce_bit_field && mode != BLKmode)
> +  else if (reduce_bit_field
> +&& mode != BLKmode
> +&& (MEM_P (op0) || !COMPLEX_MODE_P (GET_MODE (op0
>   return extract_bit_field (op0, TYPE_PRECISION (type), 0,
> TYPE_UNSIGNED (type), NULL_RTX,
> mode, mode, false, NULL);
> @@ -12529,6 +12531,11 @@ expand_expr_real_1 (tree exp, rtx target
>  
> emit_move_insn (target, op0);
> op0 = target;
> +
> +   if (reduce_bit_field && mode != BLKmode)
> + return extract_bit_field (op0, TYPE_PRECISION (type), 0,
> +   TYPE_UNSIGNED (type), NULL_RTX,
> +   mode, mode, false, NULL);
>   }
>  
>/* If OP0 is (now) a MEM, we need to deal with alignment issues.  If 
> the
> --- gcc/testsuite/gcc.dg/bitint-116.c.jj  2024-11-18 18:50:05.322348174 
> +0100
> +++ gcc/testsuite/gcc.dg/bitint-116.c 2024-11-18 18:50:57.785614318 +0100
> @@ -0,0 +1,11 @@
> +/* PR middle-end/117458 */
> +/* { dg-do compile { target bitint } } */
> +/* { dg-options "-std=c23 -O2" } */
> +
> +typedef _BitInt(33) B __attribute__((may_alias));
> +
> +_BitInt(33)
> +foo (_Complex float x)
> +{
> +  return *(B *)&x;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] bitintlower: Handle PAREN_EXPR [PR117459]

2024-11-19 Thread Richard Biener
On Tue, 19 Nov 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following patch handles PAREN_EXPR in bitint lowering, and handles it
> as an optimization barrier, so that temporary arithmetics from PAREN_EXPR
> isn't mixed with temporary arithmetics from outside of the PAREN_EXPR.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2024-11-19  Jakub Jelinek  
> 
>   PR middle-end/117459
>   * gimple-lower-bitint.cc (bitint_large_huge::handle_stmt,
>   bitint_large_huge::lower_stmt): Handle PAREN_EXPR.
> 
>   * gcc.dg/torture/bitint-74.c: New test.
> 
> --- gcc/gimple-lower-bitint.cc.jj 2024-11-18 15:37:35.344738882 +0100
> +++ gcc/gimple-lower-bitint.cc2024-11-18 17:25:19.795445544 +0100
> @@ -2143,6 +2143,7 @@ bitint_large_huge::handle_stmt (gimple *
>   idx),
>   gimple_assign_rhs2 (stmt), idx);
>   case SSA_NAME:
> + case PAREN_EXPR:
>   case INTEGER_CST:
> return handle_operand (gimple_assign_rhs1 (stmt), idx);
>   CASE_CONVERT:
> @@ -5609,7 +5610,9 @@ bitint_large_huge::lower_stmt (gimple *s
>|| gimple_store_p (stmt)
>|| gimple_assign_load_p (stmt)
>|| eq_p
> -  || mergeable_cast_p)
> +  || mergeable_cast_p
> +  || (is_gimple_assign (stmt)
> +   && gimple_assign_rhs_code (stmt) == PAREN_EXPR))
>  {
>lhs = lower_mergeable_stmt (stmt, cmp_code, cmp_op1, cmp_op2);
>if (!eq_p)
> --- gcc/testsuite/gcc.dg/torture/bitint-74.c.jj   2024-11-18 
> 17:39:23.611656932 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-74.c  2024-11-18 17:40:07.027050485 
> +0100
> @@ -0,0 +1,27 @@
> +/* PR middle-end/117459 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 255
> +_BitInt(255) b, c, d;
> +
> +_BitInt(255)
> +foo ()
> +{
> +  return __builtin_assoc_barrier (b + c) + d;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __BITINT_MAXWIDTH__ >= 255
> +  b = 
> 3162082328713384445049140446737468449630746270013462291267283007210433157591wb;
> +  c = 
> 1299851477887328635550261966833804427562203752161174274777867442907371807wb;
> +  d = 
> 5016523343681809792116154509287659112784399275423992541459788346980443294044wb;
> +  if (foo () != 
> 21177121227873081565800845217991961366842707749189616007001849221633783823442wb)
> +__builtin_abort ();
> +#endif
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] v2: Add -f{, no-}assume-sane-operators-new-delete options [PR110137]

2024-11-19 Thread Richard Biener
On Tue, 19 Nov 2024, Jakub Jelinek wrote:

> On Tue, Nov 19, 2024 at 11:23:31AM +0100, Jakub Jelinek wrote:
> > On Tue, Nov 19, 2024 at 10:25:16AM +0100, Richard Biener wrote:
> > > I think it's pretty clear and easy to describe to users what "m " and 
> > > what "mC" do.  But with "pure" this is an odd intermediate state.  For 
> > > both
> > > "m " and "mP" you suggest above the new/delete might modify their
> > > global state but as you can't rely on the new/delete pair to prevail
> > > you cannot rely on the modification to happen.  But how do you explain
> > > that
> > 
> > If we are willing to make the default not strictly conforming (i.e.
> > basically revert PR101480 by default and make the GCC 11.1/11.2 behavior
> > the default and allow -fno-sane-operators-new-delete to change to GCC
> > 11.3/14.* behavior), I can live with it.
> > But we need to make the documentation clear that the default is not strictly
> > conforming.
> 
> Here is a modified version of the patch to do that.
> 
> Or do we want to set the default based on -std= option (-std=gnu* implies
> -fassume-sane-operators-new-delete, -std=c++* implies
> -fno-assume-sane-operators-new-delete)?

I don't think we want to do that.  I didn't yet suggest to make -Ofast
to change it, not to introduce a -ffast-math like flag to the frontend(s)
(or a -fstrict).

>  Though, not sure what to do for
> LTO then.

With LTO the complication is going to be there anyway - since the
fnspec isn't on the actual calls function type.  So mixing TUs
with different setting is bogus.  You probably want to mark the
flag Optimization so we have it per function.  We'd then have
to refuse inlining of functions with incompatible
flag_assume_sane_operators_new_delete setting into each other
as well.

Alternatively keep a global setting and have lto-wrapper merge
the flag conservatively.

Richard.

> 
> 2024-11-19  Jakub Jelinek  
> 
>   PR c++/110137
>   PR middle-end/101480
> gcc/
>   * doc/invoke.texi (-fassume-sane-operators-new-delete,
>   -fno-assume-sane-operators-new-delete): Document.
>   * gimple.cc (gimple_call_fnspec): Handle
>   -f{,no-}assume-sane-operators-new-delete.
> gcc/c-family/
>   * c.opt (fassume-sane-operators-new-delete): New option.
> gcc/testsuite/
>   * g++.dg/tree-ssa/pr110137-1.C: New test.
>   * g++.dg/tree-ssa/pr110137-2.C: New test.
>   * g++.dg/tree-ssa/pr110137-3.C: New test.
>   * g++.dg/torture/pr10148.C: Add -fno-assume-sane-operators-new-delete
>   as dg-additional-options.
> 
> --- gcc/doc/invoke.texi.jj2024-11-19 10:23:59.145145887 +0100
> +++ gcc/doc/invoke.texi   2024-11-19 12:07:13.942789378 +0100
> @@ -213,7 +213,9 @@ in the following sections.
>  @item C++ Language Options
>  @xref{C++ Dialect Options,,Options Controlling C++ Dialect}.
>  @gccoptlist{-fabi-version=@var{n}  -fno-access-control
> --faligned-new=@var{n}  -fargs-in-order=@var{n}  -fchar8_t  -fcheck-new
> +-faligned-new=@var{n}  -fargs-in-order=@var{n}
> +-fno-assume-sane-operators-new-delete
> +-fchar8_t  -fcheck-new
>  -fconcepts  -fconstexpr-depth=@var{n}  -fconstexpr-cache-depth=@var{n}
>  -fconstexpr-loop-limit=@var{n}  -fconstexpr-ops-limit=@var{n}
>  -fno-elide-constructors
> @@ -3163,6 +3165,35 @@ but few users will need to override the
>  
>  This flag is enabled by default for @option{-std=c++17}.
>  
> +@opindex fno-assume-sane-operators-new-delete
> +@opindex fassume-sane-operators-new-delete
> +@item -fno-assume-sane-operators-new
> +The C++ standard allows replacing the global @code{new}, @code{new[]},
> +@code{delete} and @code{delete[]} operators, though a lot of C++ programs
> +don't replace them and just use the implementation provided version.
> +Furthermore, the C++ standard allows omitting those calls if they are
> +made from new or delete expressions (and by extension the same is
> +assumed if @code{__builtin_operator_new} or @code{__builtin_operator_delete}
> +functions are used).
> +This option allows control over some optimizations around calls
> +to those operators.
> +With @code{-fassume-sane-operators-new-delete} option GCC may assume that
> +calls to the replaceable global operators from new or delete expressions or
> +from @code{__builtin_operator_new} or @code{__builtin_operator_delete} calls
> +don't read or modify any global variables or variables whose address could
> +escape to the operators (global state; except for @code{errno} for the
> +@code{new} and @code{new[]} operators).
> +This allows most optimizations across those calls and is something that
> +the implementation provided operators satisfy unless @code{malloc}
> +implementation details are observable in the code or unless @code{malloc}
> +hooks are used, but might not be satisfied if a program replaces those
> +operators.  This behavior is enabled by default.
> +With @code{-fno-assume-sane-operators-new-delete} option GCC must
> +assume all these calls (whether from new or delete expressions or called
> +direc

[PATCH][v2] tree-optimization/115825 - improve unroll estimates for volatile accesses

2024-11-19 Thread Richard Biener
The loop unrolling code assumes that one third of all volatile accesses
can be possibly optimized away which is of course not true.  This leads
to excessive unrolling in some cases.  The following tracks the number
of stmts with side-effects as those are not eliminatable later and
only assumes one third of the other stmts can be further optimized.

This causes some fallout in the testsuite where we rely on unrolling
even when calls are involved.  I have XFAILed g++.dg/warn/Warray-bounds-20.C
but adjusted the others with a #pragma GCC unroll to mimic previous
behavior and retain what the testcase was testing.  I've also filed
PR117671 for the case where the size estimation fails to honor the
stmts we then remove by inserting __builtin_unreachable ().
For gcc.dg/tree-ssa/cunroll-2.c the estimate that the code doesn't
grow is clearly bogus and we have explicit code to reject unrolling
for bodies containing calls so I've adjusted the testcase accordingly.

Re-posted with testsuite adjustments (original from July).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR tree-optimization/115825
* tree-ssa-loop-ivcanon.cc (loop_size::not_eliminatable_after_peeling):
New.
(loop_size::last_iteration_not_eliminatable_after_peeling): Likewise.
(tree_estimate_loop_size): Count stmts with side-effects as
not optimistically eliminatable.
(estimated_unrolled_size): Compute the number of stmts that can
be optimistically eliminated by followup transforms.
(try_unroll_loop_completely): Adjust.

* gcc.dg/tree-ssa/cunroll-17.c: New testcase.
* gcc.dg/tree-ssa/cunroll-2.c: Adjust to not expect unrolling.
* gcc.dg/pr94600-1.c: Force unrolling.
* c-c++-common/ubsan/unreachable-3.c: Likewise.
* g++.dg/warn/Warray-bounds-20.C: XFAIL cases we rely on
unrolling loops created by new expressions and not inlined
CTOR invocations.
---
 .../c-c++-common/ubsan/unreachable-3.c|  3 +-
 gcc/testsuite/g++.dg/warn/Warray-bounds-20.C  |  6 ++--
 gcc/testsuite/gcc.dg/pr94600-1.c  |  1 +
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c| 11 ++
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c |  3 +-
 gcc/tree-ssa-loop-ivcanon.cc  | 35 +++
 6 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-17.c

diff --git a/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c 
b/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c
index b7a0d1aa92b..8831a1fb187 100644
--- a/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c
+++ b/gcc/testsuite/c-c++-common/ubsan/unreachable-3.c
@@ -14,8 +14,9 @@ struct snic {
 void snic_log_q_error(struct snic *snic)
 {
 unsigned int i;
+#pragma GCC unroll 1
 for (i = 0; i < snic->wq_count; i++)
-ioread32(&snic->wq[i]->error_status);
+  ioread32(&snic->wq[i]->error_status);
 }
 
 /* { dg-final { scan-tree-dump "__builtin___ubsan_handle_builtin_unreachable" 
"optimized" } } */
diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C 
b/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C
index f4876d8a269..5fc55293074 100644
--- a/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C
+++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-20.C
@@ -53,8 +53,8 @@ void warn_derived_ctor_access_new_alloc ()
 
 void warn_derived_ctor_access_new_array_decl ()
 {
-  char b[sizeof (D1) * 2];// { dg-message "at offset \\d+ into object 'b' 
of size 80" "LP64 note" { target lp64 } }
-  // { dg-message "at offset \\d+ into object 'b' 
of size 40" "LP64 note" { target ilp32 } .-1 }
+  char b[sizeof (D1) * 2];// { dg-message "at offset \\d+ into object 'b' 
of size 80" "LP64 note" { target { lp64 } xfail { lp64 } } }
+  // { dg-message "at offset \\d+ into object 'b' 
of size 40" "LP64 note" { target { ilp32 } xfail { ilp32 } } .-1 }
   char *p = b;
   ++p;
   D1 *q = new (p) D1[2];
@@ -63,7 +63,7 @@ void warn_derived_ctor_access_new_array_decl ()
 
 void warn_derived_ctor_access_new_array_alloc ()
 {
-  char *p = new char[sizeof (D1) * 2];// { dg-message "at offset 
\\d+ into object of size \\d+ allocated by '\[^\n\r]*operator new\[^\n\r]*" 
"note" }
+  char *p = new char[sizeof (D1) * 2];// { dg-message "at offset 
\\d+ into object of size \\d+ allocated by '\[^\n\r]*operator new\[^\n\r]*" 
"note" { xfail *-*-* } }
   ++p;
   D1 *q = new (p) D1[2];
   sink (q);
diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c b/gcc/testsuite/gcc.dg/pr94600-1.c
index 149e4f35dbe..d5fb4d169c4 100644
--- a/gcc/testsuite/gcc.dg/pr94600-1.c
+++ b/gcc/testsuite/gcc.dg/pr94600-1.c
@@ -31,6 +31,7 @@ foo(void)
 {
   __SIZE_TYPE__ i;
   __SIZE_TYPE__ base = 0x000a;
+#pragma GCC unroll 5
   for (i = 0; i < (sizeof (a0) / sizeof ((a0)[0])); i++) {
 *(volatile t0 *) (base + 44 + i * 4) = a0[i];
   }
diff --git a/gcc/testsuite/gcc.dg/tre

[COMMITTED 3/3] ada: Rename Within_Case_Or_If_Expression predicate

2024-11-19 Thread Marc Poulhiès
From: Eric Botcazou 

The case and if expressions are exactly the conditional expressions.

gcc/ada/ChangeLog:

* exp_util.ads (Within_Case_Or_If_Expression): Rename into...
(Within_Conditional_Expression): ...this.
* exp_util.adb (Within_Case_Or_If_Expression): Rename into...
(Within_Conditional_Expression): ...this.
* checks.adb (Install_Null_Excluding_Check): Adjust for renaming.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/checks.adb   |  2 +-
 gcc/ada/exp_util.adb | 12 ++--
 gcc/ada/exp_util.ads |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
index 8a3c4e8b4bf..1ec49924c9b 100644
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -8427,7 +8427,7 @@ package body Checks is
--  where the expression might not be evaluated, and the warning
--  appear as extraneous noise.
 
-   and then not Within_Case_Or_If_Expression (N)
+   and then not Within_Conditional_Expression (N)
  then
 Apply_Compile_Time_Constraint_Error
   (N, "null value not allowed here??", CE_Access_Check_Failed);
diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index e4397fe868d..149be620b1b 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -6702,7 +6702,7 @@ package body Exp_Util is
   --  then we need to insert at the appropriate (inner) location in
   --  the not as an action on Node_To_Be_Wrapped.
 
-  In_Cond_Expr : constant Boolean := Within_Case_Or_If_Expression (N);
+  In_Cond_Expr : constant Boolean := Within_Conditional_Expression (N);
 
begin
   --  When the node is inside a case/if expression, the lifetime of any
@@ -14632,11 +14632,11 @@ package body Exp_Util is
   Map_Types (Parent_Type, Derived_Type);
end Update_Primitives_Mapping;
 
-   --
-   -- Within_Case_Or_If_Expression --
-   --
+   ---
+   -- Within_Conditional_Expression --
+   ---
 
-   function Within_Case_Or_If_Expression (N : Node_Id) return Boolean is
+   function Within_Conditional_Expression (N : Node_Id) return Boolean is
   Nod : Node_Id;
   Par : Node_Id;
 
@@ -14680,7 +14680,7 @@ package body Exp_Util is
   end loop;
 
   return False;
-   end Within_Case_Or_If_Expression;
+   end Within_Conditional_Expression;
 
--
-- Predicate_Check_In_Scope --
diff --git a/gcc/ada/exp_util.ads b/gcc/ada/exp_util.ads
index 0872db21491..751fb5b31e0 100644
--- a/gcc/ada/exp_util.ads
+++ b/gcc/ada/exp_util.ads
@@ -1283,11 +1283,11 @@ package Exp_Util is
--  when elaborating a contract for a subprogram, and when freezing a type
--  extension to verify legality rules on inherited conditions.
 
-   function Within_Case_Or_If_Expression (N : Node_Id) return Boolean;
+   function Within_Conditional_Expression (N : Node_Id) return Boolean;
--  Determine whether arbitrary node N is immediately within a dependent
-   --  expression of a case or an if expression. The criterion is whether
+   --  expression of a conditional expression. The criterion is whether
--  temporaries created by the actions attached to N need to outlive an
-   --  enclosing case or if expression.
+   --  enclosing conditional expression.
 
 private
pragma Inline (Duplicate_Subexpr);
-- 
2.43.0



Re: [PATCH v2] RISC-V: Tie MUL and DIV masks to the M extension

2024-11-19 Thread Jeff Law




On 11/15/24 8:31 AM, Dimitar Dimitrov wrote:

When configuring GCC for RV32EC with:
   ./configure \
   --target=riscv32-none-elf   \
   --with-multilib-generator="rv32ec-ilp32e--" \
   --with-abi=ilp32e   \
   --with-arch=rv32ec

Then the build fails because division is erroneously left enabled:
cc1: error: '-mdiv' requires '-march' to subsume the 'M' extension
-fself-test: 8412281 pass(es) in 0.647173 seconds

Fix by disabling MASK_DIV if multiplication is not available and -mdiv
option has not been explicitly passed.

Tested the above RV32EC-only toolchain using the GNU simulator:
  === gcc Summary ===

  # of expected passes211635
  # of unexpected failures3004
  # of expected failures  1061
  # of unresolved testcases   5651
  # of unsupported tests  18958

The high number of failures is due to unrelated testsuite issue PR117603.

Ok for trunk once CI is green?

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_override_options_internal):
Set division option's default to disabled if multiplication
is not available.

Signed-off-by: Dimitar Dimitrov 
---
Changes in v2:
   - Do not forcefully tie MUL and DIV in riscv_ext_flag_table.  That
 causes the gcc.target/riscv/predef-7.c case to fail when M
 extension is available but -mno-div option is explicitly set:
   ... -march=rv32em -mabi=ilp32e -mno-div ...

OK
jeff



Re: [PATCH v2 04/14] tree-phinodes: Use 4 instead of 2 as the minimum number of phi args

2024-11-19 Thread Richard Biener
On Sun, Nov 17, 2024 at 4:28 AM Lewis Hyatt  wrote:
>
> Currently, when we allocate a gphi object, we round up the capacity for the
> trailing arguments array such that it will make full use of the page size
> that ggc will allocate. While there is also an explicit minimum of 2
> arguments, in practice after rounding to the ggc page size there is always
> room for at least 4.
>
> It seems we have some code that has come to depend on there being this much
> room before reallocation of a PHI is required. For example, the function
> loop_version () used during loop optimization will make sure there is room
> for an additional edge on each PHI that it processes. But there are call
> sites which cache a PHI pointer prior to calling loop_version () and assume
> it remains valid afterward, thus implicitly assuming that the PHI will have
> spare capacity. Examples include split_loop () and gen_parallel_loop ().
>
> This works fine now, but if the size of a gphi becomes larger, e.g. due to
> configuring location_t to be a 64-bit type, then on 32-bit platforms it ends
> up being possible to get a gphi with only 2 arguments of capacity, causing
> the above call sites of loop_version () to fail. (They store a pointer to a
> gphi object that no longer has the same meaning it did before it got
> reallocated.) The testcases gcc.dg/torture/pr113707-2.c and
> gcc.dg/graphite/pr81945.c exhibit that failure mode.
>
> It may be necessary to adjust those call sites to make this more robust, but
> in the meantime, changing the minimum from 2 to 4 does no harm given the
> minimum is practically 4 anyway, and it resolves the issue for 32-bit
> platforms.

We need to fix the users.  Note ideal_phi_node_len rounds up to a power of two
but extra_order_size_table also has MAX_ALIGNMENT * n with n from 1 to 16
buckets, so such extensive rounding up is not needed.

The cache is also quite useless this way (I didn't fix this when last working
there).

Richard.

> gcc/ChangeLog:
>
> * tree-phinodes.cc (MIN_PHI_ARGS): New constant.
> (allocate_phi_node): Change from hard-coded value 2 to MIN_PHI_ARGS,
> which is now 4.
> (ideal_phi_node_len): Likewise.
> (release_phi_node): Likewise.
> ---
>  gcc/tree-phinodes.cc | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/tree-phinodes.cc b/gcc/tree-phinodes.cc
> index 5a7e4a94e57..9d8e16ac200 100644
> --- a/gcc/tree-phinodes.cc
> +++ b/gcc/tree-phinodes.cc
> @@ -63,11 +63,12 @@ along with GCC; see the file COPYING3.  If not see
> walking the elements of the last array entry would result in finding less
> than .1% additional reusable PHI nodes.
>
> -   Note that we can never have less than two PHI argument slots.  Thus,
> -   the -2 on all the calculations below.  */
> +   Note that we can never have less than MIN_PHI_ARGS argument slots.  Thus,
> +   the subtraction of MIN_PHI_ARGS on all the calculations below.  */
>
>  #define NUM_BUCKETS 10
> -static GTY ((deletable (""))) vec 
> *free_phinodes[NUM_BUCKETS - 2];
> +#define MIN_PHI_ARGS 4
> +static GTY ((deletable (""))) vec 
> *free_phinodes[NUM_BUCKETS - MIN_PHI_ARGS];
>  static unsigned long free_phinode_count;
>
>  static int ideal_phi_node_len (int);
> @@ -94,17 +95,18 @@ static inline gphi *
>  allocate_phi_node (size_t len)
>  {
>gphi *phi;
> -  size_t bucket = NUM_BUCKETS - 2;
> +  size_t bucket = NUM_BUCKETS - MIN_PHI_ARGS;
>size_t size = sizeof (struct gphi)
> + (len - 1) * sizeof (struct phi_arg_d);
>
>if (free_phinode_count)
> -for (bucket = len - 2; bucket < NUM_BUCKETS - 2; bucket++)
> +for (bucket = len - MIN_PHI_ARGS; bucket < NUM_BUCKETS - MIN_PHI_ARGS;
> +bucket++)
>if (free_phinodes[bucket])
> break;
>
>/* If our free list has an element, then use it.  */
> -  if (bucket < NUM_BUCKETS - 2
> +  if (bucket < NUM_BUCKETS - MIN_PHI_ARGS
>&& gimple_phi_capacity ((*free_phinodes[bucket])[0]) >= len)
>  {
>free_phinode_count--;
> @@ -145,9 +147,8 @@ ideal_phi_node_len (int len)
>size_t size, new_size;
>int log2, new_len;
>
> -  /* We do not support allocations of less than two PHI argument slots.  */
> -  if (len < 2)
> -len = 2;
> +  /* We do not support allocations of less than MIN_PHI_ARGS argument slots. 
>  */
> +  len = MAX (len, MIN_PHI_ARGS);
>
>/* Compute the number of bytes of the original request.  */
>size = sizeof (struct gphi)
> @@ -225,14 +226,13 @@ release_phi_node (gimple *phi)
>
>/* Immediately return the memory to the allocator when we would
>   only ever re-use it for a smaller size allocation.  */
> -  if (len - 2 >= NUM_BUCKETS - 2)
> +  if (len >= NUM_BUCKETS)
>  {
>ggc_free (phi);
>return;
>  }
>
> -  bucket = len > NUM_BUCKETS - 1 ? NUM_BUCKETS - 1 : len;
> -  bucket -= 2;
> +  bucket = len - MIN_PHI_ARGS;
>vec_safe_push (free_phinodes[bucket], phi);
>free_phinode_cou

Re: Should -fsanitize=bounds support counted-by attribute for pointers inside a structure?

2024-11-19 Thread Martin Uecker
Am Dienstag, dem 19.11.2024 um 09:18 -0800 schrieb Kees Cook:
> On Tue, Nov 19, 2024 at 05:41:13PM +0100, Martin Uecker wrote:
> > Am Dienstag, dem 19.11.2024 um 10:47 -0500 schrieb Marek Polacek:
> > > On Mon, Nov 18, 2024 at 07:10:35PM +0100, Martin Uecker wrote:
> > > > Am Montag, dem 18.11.2024 um 17:55 + schrieb Qing Zhao:
> > > > > Hi,
> > > > > 
> > > > > I am working on extending “counted_by” attribute to pointers inside a 
> > > > > structure per our previous discussion. 
> > > > > 
> > > > > I need advice on the following question:
> > > > > 
> > > > > Should -fsantize=bounds support array reference that was referenced 
> > > > > through a pointer that has counted_by attribute? 
> > > 
> > > I don't see why it couldn't, perhaps as part of -fsanitize=bounds-strict.
> > > Someone has to implement it, though.
> > 
> > I think Qing was volunteering to do this.  My point was that
> > this would not necessarily be undefined behavior, but instead
> > could trap for possibly defined behavior.  I would not mind, but
> > I point out that in the past people insisted that the sanitizers
> > are only intended to screen for undefined behavior.
> 
> I think it's a mistake to confuse the sanitizers with only addressing
> "undefined behavior". The UB sanitizers are just a subset of the
> sanitizers in general, and I think UB is a not a good category for how
> to group the behaviors.
> 
> For the Linux kernel, we want robustness. UB leads to ambiguity, so
> we're quite interested in getting rid of UB, but the bounds sanitizer is
> expected to implement bounds checking, regardless of UB-ness.
> 
> I would absolutely want -fsanitize=bounds to check the construct Qing
> mentioned.
> 
> Another aspect I want to capture for Linux is _pointer_ bounds, so that
> this would be caught:
> 
> #include 
> 
> struct annotated {
>   int b;
>   int *c __attribute__ ((counted_by (b)));
> } *p_array_annotated;
> 
> void __attribute__((__noinline__)) setup (int annotated_count)
> {
>   p_array_annotated
> = (struct annotated *)malloc (sizeof (struct annotated));
>   p_array_annotated->c = (int *) malloc (annotated_count *  sizeof (int));
>   p_array_annotated->b = annotated_count;
> 
>   return;
> }
> 
> int main(int argc, char *argv[])
> {
>   int i;
>   int *c;
> 
>   setup (10);
>   c = p_array_annotated->c;
>   for (i = 0; i < 11; i++)
> *c++ = 2; // goes boom at i == 10
>   return 0;
> }
> 
> This may be a separate sanitizer, and it may require a totally different
> set of internal tracking, but being able to discover that we've run off
> the end of an allocation is needed.
> 
> Of course, the biggest deal is that
> __builtin_dynamic_object_size(p_array_annotated->c, 1) will return
> 10 * sizeof(*p_array_annotated->c)

I want this too. The plan preliminary discussed in WG14 is to
have a proper language extension for this, tentatively:

struct foo {
  int n;
  char (*p)[.n];
};

(details to change, the syntax is what I would like to havE)



> > 
> > >  
> > > > I think the question is what -fsanitize=bounds is meant to be.
> > > > 
> > > > I am a bit frustrated about the sanitizer.  On the
> > > > one hand, it is not doing enough to get spatial memory
> > > > safety even where this would be easily possible, on the
> > > > other hand, is pedantic about things which are technically
> > > > UB but not problematic and then one is prevented from
> > > > using it
> > > > 
> > > > When used in default mode, where execution continues, it
> > > > also does not mix well with many warning, creates more code,
> > > > and pulls in a libary dependency (and the library also depends
> > > > on upstream choices / progress which seems a limitation for
> > > > extensions).
> > > > 
> > > > What IMHO would be ideal is a protection mode for spatial
> > > > memory safety that simply adds traps (which then requires
> > > > no library, has no issues with other warnings, and could
> > > > evolve independently from clang) 
> > > > 
> > > > So shouldn't we just add a -fboundscheck (which would 
> > > > be like -fsanitize=bounds -fsanitize-trap=bounds just with
> > > > more checking) and make it really good? I think many people
> > > > would be very happy about this.
> > > 
> > > That's a separate concern.  We already have the -fbounds-check option,
> > > currently only used in Fortran (and D?), so perhaps we could make
> > > that option a shorthand for -fsanitize=bounds -fsanitize-trap=bounds.
> > 
> > I think it could share large parts of the implementation, but the
> > main reason for having a separate option would be to do something
> > better than the sanitizer.  So it could not simply be a shorthand.
> 
> I don't want to reinvent the wheel here -- the sanitizers already have 3
> modes of operation (trap, callback with details, callback without
> details), and Linux uses the first 2 modes already, and has had plans to
> use the third (smaller resulting image).
> 
> Most notably, Linux _must_ have a warn-only mode or the feature will
> ne

Re: [PATCH 13/17] testsuite: arm: Use -march=unset for cortex-m55* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:24, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/cortex-m55-nodsp-flag-hard.c: Added option
>   "-march=unset".
>   * gcc.target/arm/cortex-m55-nodsp-flag-softfp.c: Likewise.
>   * gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c: Likesie.
>   * gcc.target/arm/cortex-m55-nofp-flag-hard.c: Likewise.
>   * gcc.target/arm/cortex-m55-nofp-flag-softfp.c: Likewise.
>   * gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c: Likewise.
>   * gcc.target/arm/cortex-m55-nomve-flag-hard.c: Likewise.
>   * gcc.target/arm/cortex-m55-nomve-flag-softfp.c: Likewise.
>   * gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c: Likewise.
>   * gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c: Likewise.
> 
> Signed-off-by: Torbjörn SVENSSON 

OK (I don't think it's worth adding entries to the CPU table for these).

R.

> ---
>  gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c   | 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c | 2 +-
>  .../gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c  | 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c| 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c  | 2 +-
>  .../gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c  | 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c   | 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c | 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c| 2 +-
>  gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c  | 2 +-
>  10 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c 
> b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
> index 92c15112ae7..9810e28f58d 100644
> --- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
> +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c
> @@ -1,6 +1,6 @@
>  /* { dg-do assemble } */
>  /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> -/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mthumb -mfloat-abi=hard 
> -mfpu=auto --save-temps" } */
> +/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp -mthumb 
> -mfloat-abi=hard -mfpu=auto --save-temps" } */
>  /* { dg-final { scan-assembler "\.arch_extension fp" } } */
>  /* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */
>  /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c 
> b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
> index 89d778f8ecb..cc92d8ccc60 100644
> --- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
> +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c
> @@ -1,6 +1,6 @@
>  /* { dg-do assemble } */
>  /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> -/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mthumb 
> -mfloat-abi=softfp -mfpu=auto --save-temps" } */
> +/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp -mthumb 
> -mfloat-abi=softfp -mfpu=auto --save-temps" } */
>  /* { dg-final { scan-assembler "\.arch_extension fp" } } */
>  /* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */
>  /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c 
> b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
> index 405090ca9e9..11d4634a30f 100644
> --- a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
> +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c
> @@ -1,6 +1,6 @@
>  /* { dg-do assemble } */
>  /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> -/* { dg-additional-options "-mcpu=cortex-m55+nodsp+nofp -mthumb 
> -mfloat-abi=softfp -mfpu=auto --save-temps" } */
> +/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nodsp+nofp -mthumb 
> -mfloat-abi=softfp -mfpu=auto --save-temps" } */
>  /* { dg-final { scan-assembler-not "\.arch_extension fp" } } */
>  /* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */
>  /* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c 
> b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
> index e0fb307ac30..bed231fc711 100644
> --- a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
> +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c
> @@ -1,6 +1,6 @@
>  /* { dg-do assemble } */
>  /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> -/* { dg-additional-options "-mcpu=cortex-m55+nofp -mthumb -mfloat-abi=hard 
> -mfpu=auto --save-temps" } */
> +/* { dg-additional-options "-march=unset -mcpu=cortex-m55+nofp 

[PATCH] RISC-V: testsuite: restrict big endian test to non vector

2024-11-19 Thread Edwin Lu
RISC-V vector currently does not support big endian so the postcommit
was getting the sorry, not implemented error on vector targets. Restrict
the testcase to non-vector targets

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr117595.c: Restrict to non vector targets.

Signed-off-by: Edwin Lu 
---
 gcc/testsuite/gcc.target/riscv/pr117595.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/pr117595.c 
b/gcc/testsuite/gcc.target/riscv/pr117595.c
index a870df08ee4..156b9388d6e 100644
--- a/gcc/testsuite/gcc.target/riscv/pr117595.c
+++ b/gcc/testsuite/gcc.target/riscv/pr117595.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { ! riscv_v } } } */
 /* { dg-options "-mbig-endian" } */
 
 _Atomic enum { E0 } e;
-- 
2.34.1



Re: [RFC PATCH 1/5] vect: Force alignment peeling to vectorize more early break loops

2024-11-19 Thread Richard Sandiford
Alex Coplan  writes:
> On 19/11/2024 17:02, Richard Sandiford wrote:
>> Sorry for the slow review.  Finally catching up on backlog.
>> 
>> Richard Biener  writes:
>> > On Mon, 28 Oct 2024, Alex Coplan wrote:
>> >
>> >> This allows us to vectorize more loops with early exits by forcing
>> >> peeling for alignment to make sure that we're guaranteed to be able to
>> >> safely read an entire vector iteration without crossing a page boundary.
>> >> 
>> >> To make this work for VLA architectures we have to allow compile-time
>> >> non-constant target alignments.  We also have to override the result of
>> >> the target's preferred_vector_alignment hook if it isn't a power-of-two
>> >> multiple of the TYPE_SIZE of the chosen vector type.
>> >> 
>> >> There is currently an implicit assumption that the TYPE_SIZE of the
>> >> vector type is itself a power of two.  For non-VLA types this
>> >> could be checked directly in the vectorizer.  For VLA types I
>> >> had discussed offline with Richard S about adding a target hook to allow
>> >> the vectorizer to query the backend to confirm that a given VLA type
>> >> is known to have a power-of-two size at runtime.
>> >
>> > GCC assumes all vectors have power-of-two size, so I don't think we
>> > need to check anything but we'd instead have to make sure the
>> > target constrains the hardware when this assumption doesn't hold
>> > in silicon.
>> 
>> We did at one point support non-power-of-2 for VLA only.  But things
>> might have crept in since that break it even for VLA.  It's no longer
>> something that matters for SVE because the architecture has been
>> tightened to remove the non-power-of-2 option.
>> 
>> My main comment on the patch is about:
>> 
>> +  /* Below we reject compile-time non-constant target alignments, but if
>> + our misalignment is zero, then we are known to already be aligned
>> + w.r.t. any such possible target alignment.  */
>> +  if (known_eq (misalignment, 0))
>> +return 0;
>> 
>> When is that true for VLA?  It seems surprising that we can guarantee
>> alignment to an unknown boundary :)  However, I agree that it's the
>> natural consequence of the formula.
>
> My vague memory is that the alignment peeling machinery forces the
> dr_info->misalignment to 0 after we've decided to peel for alignment
> (for DRs which we know we will have made aligned by peeling).  So the
> check is designed to handle that case.

Ah, yeah, of course.  Sorry for the dumb question.  I'd forgotten that
that was what the misalignment represented here, rather than the
incoming/"natural" misalignment.

Thanks,
Richard


Re: [PATCH] RISC-V: testsuite: restrict big endian test to non vector

2024-11-19 Thread Jeff Law




On 11/19/24 2:08 PM, Edwin Lu wrote:

RISC-V vector currently does not support big endian so the postcommit
was getting the sorry, not implemented error on vector targets. Restrict
the testcase to non-vector targets

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr117595.c: Restrict to non vector targets.

OK
jeff



Re: [to-be-committed][RISC-V][PR target/117649] Fix branch on masked values splitter

2024-11-19 Thread Jeff Law




On 11/19/24 12:01 PM, Jeff Law wrote:
Andreas reported GCC mis-compiled GAS for risc-v  Thankfully he also 
reduced it to a nice little testcase.


So the whole point of the pattern in question is to "reduce" the 
constants by right shifting away common unnecessary bits in RTL 
expressions like this:



  [(set (pc)
    (if_then_else (any_eq
    (and:ANYI (match_operand:ANYI 1 "register_operand" 
"r")
  (match_operand 2 
"shifted_const_arith_operand" "i"))

    (match_operand 3 "shifted_const_arith_operand" "i"))
 (label_ref (match_operand 0 "" ""))
 (pc)))



When applicable, the reduced constants in operands 2/3 fit into a simm12 
and thus do not need multi-instruction synthesis.  Note that we have to 
also shift operand 1.


That shift should have been an arithmetic shift, but was incorrectly 
coded as a logical shift.


Fixed with the obvious change on the right shift opcode.

Expecting to push to the trunk once the pre-commit tester renders its 
verdict.  I've already tested in this my tester for rv32 and rv64.
If you look at the patch, it's got a hunk from a different bugfix :-) 
Obviously that'll need to be addressed.


jeff


Re: [PATCH] expr, c, gimplify, v3: Don't clear whole unions [PR116416]

2024-11-19 Thread Joseph Myers
On Tue, 15 Oct 2024, Jakub Jelinek wrote:

> --- gcc/testsuite/gcc.dg/gnu11-empty-init-1.c.jj  2024-10-15 
> 16:14:23.411063701 +0200
> +++ gcc/testsuite/gcc.dg/gnu11-empty-init-1.c 2024-10-15 16:31:02.302984714 
> +0200
> @@ -0,0 +1,199 @@
> +/* Test GNU C11 support for empty initializers.  */
> +/* { dg-do run } */
> +/* { dg-options "-std=gnu23" } */

All these gnu11-*.c tests are using -std=gnu23, which doesn't make sense.  
If they're meant to test what GCC does in C11 mode, use -std=gnu11; if 
they're meant to use -std=gnu23, name them gnu23-*.c.  (In either case, 
the tests might, as now, also have -fzero-init-padding-bits= options when 
that's part of what they're meant to test.)

-- 
Joseph S. Myers
josmy...@redhat.com



[COMMITTED 2/3] ada: Small fix in expansion of array aggregates handled by the back end

2024-11-19 Thread Marc Poulhiès
From: Eric Botcazou 

The (minimal) expansion is now done by Build_Array_Aggr_Code in all cases,
which means that it must prevent the aggregate from being re-analyzed as
the RHS of the assignment, which may trigger a bogus warning and lead to
another useless rewriting.

The change also inlines Build_Assignment_With_Temporary that is now called
only once by Build_Array_Aggr_Code for this processing.

gcc/ada/ChangeLog:

* exp_aggr.adb (Build_Assignment_With_Temporary): Inline into...
(Build_Array_Aggr_Code): ...this.  Set the Analyzed flag on the
relocated aggregate if it is to be handled by the back-end.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 71 
 1 file changed, 25 insertions(+), 46 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index c34df840401..a82705dca3f 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -78,13 +78,6 @@ with Warnsw; use Warnsw;
 
 package body Exp_Aggr is
 
-   function Build_Assignment_With_Temporary
- (Target : Node_Id;
-  Typ: Entity_Id;
-  Source : Node_Id) return List_Id;
-   --  Returns a list of actions to assign Source to Target of type Typ using
-   --  an extra temporary, which can potentially be large.
-
type Case_Bounds is record
  Choice_Lo   : Node_Id;
  Choice_Hi   : Node_Id;
@@ -1925,8 +1918,8 @@ package body Exp_Aggr is
--  Start of processing for Build_Array_Aggr_Code
 
begin
-  --  If the assignment can be done directly by the back end, then reset
-  --  the Set_Expansion_Delayed flag and do not expand further.
+  --  If the assignment can be done directly by the back end, then expand
+  --  into an assignment statement.
 
   if Present (Etype (N))
 and then Aggr_Assignment_OK_For_Backend (N)
@@ -1940,7 +1933,13 @@ package body Exp_Aggr is
  (if Nkind (Into) = N_Unchecked_Type_Conversion
   then Expression (Into)
   else Into);
+
+Temp : Node_Id;
+
  begin
+--  Block any further processing of the aggregate by the front end
+
+Set_Analyzed (New_Aggr);
 Set_Expansion_Delayed (New_Aggr, False);
 
 --  In the case where the target is the dereference of a prefix
@@ -1956,7 +1955,23 @@ package body Exp_Aggr is
  (Storage_Model_Object
(Etype (Prefix (Target)
 then
-   return Build_Assignment_With_Temporary (Into, Typ, New_Aggr);
+   Temp := Build_Temporary_On_Secondary_Stack (Loc, Typ, New_Code);
+
+   Append_To (New_Code,
+ Make_OK_Assignment_Statement (Loc,
+   Name   =>
+ Make_Explicit_Dereference (Loc,
+   Prefix => New_Occurrence_Of (Temp, Loc)),
+   Expression => New_Aggr));
+
+   Append_To (New_Code,
+ Make_OK_Assignment_Statement (Loc,
+   Name   => Target,
+   Expression =>
+ Make_Explicit_Dereference (Loc,
+   Prefix => New_Occurrence_Of (Temp, Loc;
+
+   return New_Code;
 
 else
return New_List (
@@ -2159,42 +2174,6 @@ package body Exp_Aggr is
   return New_Code;
end Build_Array_Aggr_Code;
 
-   -
-   -- Build_Assignment_With_Temporary --
-   -
-
-   function Build_Assignment_With_Temporary
- (Target : Node_Id;
-  Typ: Entity_Id;
-  Source : Node_Id) return List_Id
-   is
-  Loc : constant Source_Ptr := Sloc (Source);
-
-  Aggr_Code : List_Id;
-  Tmp   : Entity_Id;
-
-   begin
-  Aggr_Code := New_List;
-
-  Tmp := Build_Temporary_On_Secondary_Stack (Loc, Typ, Aggr_Code);
-
-  Append_To (Aggr_Code,
-Make_OK_Assignment_Statement (Loc,
-  Name   =>
-Make_Explicit_Dereference (Loc,
-  Prefix => New_Occurrence_Of (Tmp, Loc)),
-  Expression => Source));
-
-  Append_To (Aggr_Code,
-Make_OK_Assignment_Statement (Loc,
-  Name   => Target,
-  Expression =>
-Make_Explicit_Dereference (Loc,
-  Prefix => New_Occurrence_Of (Tmp, Loc;
-
-  return Aggr_Code;
-   end Build_Assignment_With_Temporary;
-

-- Build_Record_Aggr_Code --

-- 
2.43.0



Re: [PATCH 3/3] AArch64: Add SVE vector cost to baseline tuning

2024-11-19 Thread Richard Sandiford
Kyrylo Tkachov  writes:
>> On 15 Nov 2024, at 12:33, Wilco Dijkstra  wrote:
>> 
>> Hi Kyrill,
>> 
>>> This would make USE_NEW_VECTOR_COSTS effectively the default.
>>> Jennifer has been trying to do that as well and then to remove it (as it 
>>> would be always true) but there are some codegen regressions that still > 
>>> need to be addressed.
>> 
>> Yes, that's the goal - we should use good tuning settings by default, 
>> especially if
>> they work well on modern cores. I noticed a huge gap between 
>> -mcpu=neoverse-v2
>> and -march=armv9-a, so the idea is to make the tunings more similar. Note 
>> this
>> particular patch won't make a difference since both of these tunings already 
>> use the
>> new vector costs and throughput setting.
>> 
>>> See the threads “[RFC][PATCH] AArch64: Remove 
>>> AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS” from October and September.
>>> Do those regressions go away if you also specify 
>>> AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT at the same time?
>> 
>> I believe we always use both of those settings together. Removing the 
>> settings by
>> making them the default looks like a good idea indeed. We have too many tune
>> settings...
>
> In principle the only SVE-enabled SVE core that 
> AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT wouldn’t apply for is A64FX but 
> that tuning was also not validated with 
> AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS so indeed in all current uses they 
> appear together.
> I wouldn’t mind assuming AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT in the 
> generic tuning if others agree,

It looks like we already do that for generic_armv8.h and generic_armv9.h,
which are the ones that people would use in practice.  IMO we should
probably leave it out of "generic" itself, since like you say, A64FX is
a good example of why the flag is less likely to hold for long SVE vectors.

> but I don’t think we should remove the ! 
> AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT paths just yet.

Yeah, I agree we should keep AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
tunable.

Thanks,
Richard


Re: [PATCH v2 2/4] vect: disable multiple calls of poly simdclones

2024-11-19 Thread Victor Do Nascimento
Richard Biener  writes:

> On Mon, 18 Nov 2024, Victor Do Nascimento wrote:
>
>> On 11/5/24 07:39, Richard Biener wrote:
>> > On Tue, 5 Nov 2024, Victor Do Nascimento wrote:
>> > 
>> >> The current codegen code to support VF's that are multiples of a simdclone
>> >> simdlen rely on BIT_FIELD_REF to create multiple input vectors.  This does
>> >> not
>> >> work for non-constant simdclones, so we should disable using such clones
>> >> when
>> >> the VF is a multiple of the non-constant simdlen until we change the
>> >> codegen to
>> >> support those.
>> > 
>> > ISTR BIT_FIELD_REF now uses poly-int offset and size so what breaks
>> > here?  I don't see any other way that such BIT_FIELD_REFs to represent
>> > hi/lo part accesses?
>> > 
>> > Richard.
>> 
>> Upon further investigation, while you are right that BIT_FIELD_REF does
>> use poly-int types for both offsets and sizes, much of the expand code
>> does not know how to deal with variable-len vectors.
>> 
>> One such example of this is in `store_constructor_field' where, after
>> having called our simdclone fn twice passing it the high and low parts
>> of an SVE vec as an argument, we try to concatenate the returned
>> subvectors back together.
>> 
>> In `store_constructor', for example, many updates from integer operators
>> to their poly_int64 counterparts are needed (though this is trivial) and
>> in other parts new logic is needed altogether.
>> 
>> One example of this can be seen in `store_bit_field_1', where you see
>> the following statments:
>> 
>>   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
>>   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
>> 
>> which will ICE for variable-length bitsizes and bitnums and which guard
>> the subsequent section of code from trying to handle non-const lens.
>
> But since BIT_FIELD_REFs with poly-int offset/size are now a thing
> we have to fix all those places.  Or revert back to not allowing
> them.
>
> We at least need to guard the problematic code appropriately.
>
>> Therefore, while I believe that the full support for poly-int
>> BIT_FIELD_REFs is appropriate and something that ought to be done, the
>> 
>>   if (!n->simdclone->simdlen.is_constant () && num_calls != 1)
>> 
>> guard in this patch is, at least for now, strictly necessary and can be
>> readily removed once work on BIT_FIELD_REF is completed.
>
> GCCs history tells us this will never happen ;) Given this is for

Brutal but true :P

> a new feature I insist you try a bit harder and fixup the places that
> ICE (if only by guarding them with .is_constant ()).  Those parts will
> be helpful even if in the end you don't succeed with bug-squashing.

Fair enough. I'm about halfway through fixing the issues we see (full
disclosure, the first half is the easy half).

And about the need to at least guard the problematic code appropriately,
that's what I've done wherever I've not yet developed a satisfactory
solution.  Still, a full fix would be nice.

I look forward to submitting the relevant fixes.

Many thanks,
Victor


> Richard.
>
>> Regards,
>> Victor
>> 
>> >> gcc/ChangeLog:
>> >>
>> >>  * tree-vect-stmts.cc (vectorizable_simd_clone_call): Reject simdclones
>> >>  with non-constant simdlen when VF is not exactly the same.
>> >> ---
>> >>   gcc/tree-vect-stmts.cc | 5 -
>> >>   1 file changed, 4 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
>> >> index 2d0da6f0a0e..961421fee25 100644
>> >> --- a/gcc/tree-vect-stmts.cc
>> >> +++ b/gcc/tree-vect-stmts.cc
>> >> @@ -4149,7 +4149,10 @@ vectorizable_simd_clone_call (vec_info *vinfo,
>> >> stmt_vec_info stmt_info,
>> >>if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
>> >> &num_calls)
>> >>|| (!n->simdclone->inbranch && (masked_call_offset > 0))
>> >> - || (nargs != simd_nargs))
>> >> + || (nargs != simd_nargs)
>> >> + /* Currently we do not support multiple calls of non-constant
>> >> +simdlen as poly vectors can not be accessed by BIT_FIELD_REF.
>> >> */
>> >> + || (!n->simdclone->simdlen.is_constant () && num_calls != 1))
>> >>  continue;
>> >>if (num_calls != 1)
>> >>  this_badness += floor_log2 (num_calls) * 4096;
>> >>
>> > 
>> 
>> 


[PATCH] v2: Add -f{,no-}assume-sane-operators-new-delete options [PR110137]

2024-11-19 Thread Jakub Jelinek
On Tue, Nov 19, 2024 at 11:23:31AM +0100, Jakub Jelinek wrote:
> On Tue, Nov 19, 2024 at 10:25:16AM +0100, Richard Biener wrote:
> > I think it's pretty clear and easy to describe to users what "m " and 
> > what "mC" do.  But with "pure" this is an odd intermediate state.  For both
> > "m " and "mP" you suggest above the new/delete might modify their
> > global state but as you can't rely on the new/delete pair to prevail
> > you cannot rely on the modification to happen.  But how do you explain
> > that
> 
> If we are willing to make the default not strictly conforming (i.e.
> basically revert PR101480 by default and make the GCC 11.1/11.2 behavior
> the default and allow -fno-sane-operators-new-delete to change to GCC
> 11.3/14.* behavior), I can live with it.
> But we need to make the documentation clear that the default is not strictly
> conforming.

Here is a modified version of the patch to do that.

Or do we want to set the default based on -std= option (-std=gnu* implies
-fassume-sane-operators-new-delete, -std=c++* implies
-fno-assume-sane-operators-new-delete)?  Though, not sure what to do for
LTO then.

2024-11-19  Jakub Jelinek  

PR c++/110137
PR middle-end/101480
gcc/
* doc/invoke.texi (-fassume-sane-operators-new-delete,
-fno-assume-sane-operators-new-delete): Document.
* gimple.cc (gimple_call_fnspec): Handle
-f{,no-}assume-sane-operators-new-delete.
gcc/c-family/
* c.opt (fassume-sane-operators-new-delete): New option.
gcc/testsuite/
* g++.dg/tree-ssa/pr110137-1.C: New test.
* g++.dg/tree-ssa/pr110137-2.C: New test.
* g++.dg/tree-ssa/pr110137-3.C: New test.
* g++.dg/torture/pr10148.C: Add -fno-assume-sane-operators-new-delete
as dg-additional-options.

--- gcc/doc/invoke.texi.jj  2024-11-19 10:23:59.145145887 +0100
+++ gcc/doc/invoke.texi 2024-11-19 12:07:13.942789378 +0100
@@ -213,7 +213,9 @@ in the following sections.
 @item C++ Language Options
 @xref{C++ Dialect Options,,Options Controlling C++ Dialect}.
 @gccoptlist{-fabi-version=@var{n}  -fno-access-control
--faligned-new=@var{n}  -fargs-in-order=@var{n}  -fchar8_t  -fcheck-new
+-faligned-new=@var{n}  -fargs-in-order=@var{n}
+-fno-assume-sane-operators-new-delete
+-fchar8_t  -fcheck-new
 -fconcepts  -fconstexpr-depth=@var{n}  -fconstexpr-cache-depth=@var{n}
 -fconstexpr-loop-limit=@var{n}  -fconstexpr-ops-limit=@var{n}
 -fno-elide-constructors
@@ -3163,6 +3165,35 @@ but few users will need to override the
 
 This flag is enabled by default for @option{-std=c++17}.
 
+@opindex fno-assume-sane-operators-new-delete
+@opindex fassume-sane-operators-new-delete
+@item -fno-assume-sane-operators-new
+The C++ standard allows replacing the global @code{new}, @code{new[]},
+@code{delete} and @code{delete[]} operators, though a lot of C++ programs
+don't replace them and just use the implementation provided version.
+Furthermore, the C++ standard allows omitting those calls if they are
+made from new or delete expressions (and by extension the same is
+assumed if @code{__builtin_operator_new} or @code{__builtin_operator_delete}
+functions are used).
+This option allows control over some optimizations around calls
+to those operators.
+With @code{-fassume-sane-operators-new-delete} option GCC may assume that
+calls to the replaceable global operators from new or delete expressions or
+from @code{__builtin_operator_new} or @code{__builtin_operator_delete} calls
+don't read or modify any global variables or variables whose address could
+escape to the operators (global state; except for @code{errno} for the
+@code{new} and @code{new[]} operators).
+This allows most optimizations across those calls and is something that
+the implementation provided operators satisfy unless @code{malloc}
+implementation details are observable in the code or unless @code{malloc}
+hooks are used, but might not be satisfied if a program replaces those
+operators.  This behavior is enabled by default.
+With @code{-fno-assume-sane-operators-new-delete} option GCC must
+assume all these calls (whether from new or delete expressions or called
+directly) may read and write global state unless proven otherwise (e.g.@:
+when GCC compiles their implementation).  Use this option if those
+operators are or may be replaced and code needs to expect such behavior.
+
 @opindex fchar8_t
 @opindex fno-char8_t
 @item -fchar8_t
--- gcc/gimple.cc.jj2024-11-16 10:22:38.386770817 +0100
+++ gcc/gimple.cc   2024-11-19 11:45:33.386136116 +0100
@@ -1600,12 +1600,22 @@ gimple_call_fnspec (const gcall *stmt)
   && DECL_IS_OPERATOR_DELETE_P (fndecl)
   && DECL_IS_REPLACEABLE_OPERATOR (fndecl)
   && gimple_call_from_new_or_delete (stmt))
-return ". o ";
+{
+  if (flag_assume_sane_operators_new_delete)
+   return ".co ";
+  else
+   return ". o ";
+}
   /* Similarly operator new can be treated as malloc.  */
   if (fndecl
   && DECL_IS_REPLACEABLE_

Re: [PATCH] testsuite: arm: Require 16-bit float support

2024-11-19 Thread Richard Earnshaw (lists)
On 18/11/2024 12:00, Christophe Lyon wrote:
> Hi Torbjörn,
> 
> 
> On 11/18/24 10:37, Torbjorn SVENSSON wrote:
>>
>>
>> On 2024-11-08 20:37, Torbjorn SVENSSON wrote:
>>>
>>>
>>> On 2024-11-08 12:24, Richard Earnshaw (lists) wrote:
 On 05/11/2024 20:06, Torbjörn SVENSSON wrote:
> Based on how these functions are used in test cases, I think it's correct
> to require 16-bit float support in both functions.
>
> Without this change, the checks passes for armv8-m and armv8.1-m, but the
> test cases that uses them fails due to the incorrect -mfpu option.
>
> Ok for trunk and releases/gcc-14?

 Can you expand on the issue you're trying to address with this change?
>>>
>>> If dejagnu is started with a specified FPU, the function 
>>> arm_v8_2a_fp16_scalar_ok will check if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 
>>> is defined, but it will not ensure that the FPU supports 16-bit floats.
>>> The result is that with the given FPU, GCC might report that 
>>> __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is supported, but 16-bit floats are 
>>> not.
>>>
>>> With -march and -mfpu:
>>> .../bin/arm-none-eabi-gcc -E -dM - -mthumb -march=armv8-m.main+fp - 
>>> mfloat-abi=hard -mfpu=fpv5-sp-d16 -fdiagnostics-plain-output -O2 - 
>>> mcpu=unset -march=armv8.2-a+fp16  >> __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
>>> #define __ARM_FP 4
>>> #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
>>>
>>>
>>> Same as above, but with -mfpu=auto appended:
>>> .../bin/arm-none-eabi-gcc -E -dM - -mthumb -march=armv8-m.main+fp - 
>>> mfloat-abi=hard -mfpu=fpv5-sp-d16 -fdiagnostics-plain-output -O2 - 
>>> mcpu=unset -march=armv8.2-a+fp16 -mfpu=auto  >> ' -e __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
>>> #define __ARM_FP 14
>>> #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
>>>
>>>
>>> So, adding the __ARM_FP validation ensures that the empty set of flags is 
>>> never accepted for this scenario.
>>>
>>>
>>> For check_effective_target_arm_v8_2a_fp16_neon_ok_nocache, it's the same 
>>> thing but here we also assume that neon is available without checking it.
>>>
>>>
>>> Looking though other failing tests, I also notices that
>>> check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache is essential 
>>> a copy of check_effective_target_arm_v8_2a_fp16_neon_ok_nocache, but with a 
>>> different architecture and define, so I'll add a fix for that too.
>>>
>>>
>>> With all this said, I see that there is an error in this patch, so a v2 
>>> will be sent as soon as my current test run completes and there is no 
>>> regression.
>>
>>
>>
>> I've tried to dig a bit deeper into this topic.
>>
>> In gcc.target/arm/armv8_2-fp16-scalar-1.c, we do:
>>
>> /* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok }  */
>>
>> this currently checks if __ARM_FEATURE_FP16_SCALAR_ARITHMETIC is defined.
>>
>>
>> The symbol __ARM_FEATURE_FP16_SCALAR_ARITHMETIC is defined in arm-c.cc:
>>
>>    def_or_undef_macro (pfile, "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC",
>>    TARGET_VFP_FP16INST);
>>
>>
>> The symbol TARGET_VFP_FP16INST is defined in arm.h:
>>
>> /* FPU supports the floating point FP16 instructions for ARMv8.2-A
>>     and later.  */
>> #define TARGET_VFP_FP16INST \
>>    (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP5 && arm_fp16_inst)
>>
>>
>> And arm_fp16_inst is defined in arm.cc:
>>
>> /* Nonzero if this chip supports the FP16 instructions extension of ARM
>>     Architecture 8.2.  */
>> int arm_fp16_inst = 0;
>>
>> and a bit further down:
>>
>>    arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
>>
>>
>> All this tends to that __ARM_FEATURE_FP16_SCALAR_ARITHMETIC  should be an 
>> armv8.2+ feature, but isa_bit_fp16 is also set for Cortex-M52, Cortex-M55 
>> and Cortex-M85 among the Cortex-M cpus defined in arm-cpu-cdata.h (generated 
>> from arm-cpus.in).
>>
>> Now to the reason for the failure.
>> In the test case, it includes arm_fp16.h, but arm_fp16.h contains a hole 
>> bunch of functions called __builtin_neon_*, so is this header file really 
>> applicable for Cortex-M?
>> Or should arm_fp16.h be updated to also contain an alternative list of 
>> functions applicable for Cortex-M?
>>
>>
>> The failure I see in my tests are:
>>
>> Executing on host: .../bin/arm-none-eabi-gcc 
>> .../gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c  -mthumb 
>> -march=armv7e-m+fp.dp -mcpu=cortex-m7 -mfloat-abi=hard -mfpu=fpv5-d16 
>> -fdiagnostics-plain-output  -O2 -mcpu=unset -march=armv8.2-a+fp16 
>> -ffat-lto-objects -fno-ident -S -o armv8_2-fp16-scalar-1.s (timeout = 
>> 800)
>> spawn -ignore SIGHUP .../bin/arm-none-eabi-gcc 
>> .../gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c -mthumb 
>> -march=armv7e-m+fp.dp -mcpu=cortex-m7 -mfloat-abi=hard -mfpu=fpv5-d16 
>> -fdiagnostics-plain-output -O2 -mcpu=unset -march=armv8.2-a+fp16 
>> -ffat-lto-objects -fno-ident -S -o armv8_2-fp16-scalar-1.s
>> pid is 17266 -17266
>> In file included from 
>> .../gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c:7:
>> .

Re: [PATCH v1 1/2] RISC-V: Fix incorrect optimization options passing to strided ld/st test

2024-11-19 Thread Jeff Law




On 11/19/24 1:30 AM, pan2...@intel.com wrote:

From: Pan Li 

The testcases of vector strided load/store are designed to pick up
different sorts of optimization options but actually these option
are ignored according to the Execution log of gcc.log.  This patch
would like to make it correct, and then you will see the build option
similar as below from the gcc.log.

Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m1 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m1 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m4 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m8 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl 
-mrvv-max-lmul=dynamic ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m8 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4 
...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=m2 ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=scalable 
-mrvv-max-lmul=dynamic ...
Executing ... strided_ld_st-1-f16.c -O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2 
...

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Fix the incorrect optimization options.
So if this change is the right one to make for the strided subdirectory, 
then shouldn't it also be correct to make for the gather-scatter 
subdirectory as well?


And similarly for various other instances where we call dg-runtest in 
that file.


Basically I'd like to see some explanation why this is the right patch 
to make and why this case needs to be handled different from every other 
one that I see in that file.  Assuming that explanation makes sense, 
then some kind of comment i this file indicating why this case is 
different seems in order.


jeff


Re: [PATCH] Avoid repeated calls to temporarily_undo_changes [PR117297]

2024-11-19 Thread Richard Biener
On Mon, Nov 18, 2024 at 9:13 PM Richard Sandiford
 wrote:
>
> In an attempt to reduce compile time, rtl-ssa computes the cost
> of existing instructions lazily rather than eagerly.  However,
> this means that it might need to calculate the cost of an existing
> instruction while a change group is already in progress for the
> instruction.  rtl_ssa::insn_info::calculate_cost therefore temporarily
> undoes any in-progress changes in order to get back the original pattern
> and insn code.
>
> rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile,
> which calculates the cost of a change involving an arbitrary number
> of instructions.  Summing up the original cost of N instructions
> while those N instructions have in-progress changes could lead to
> O(N*N) rtl changes, since each lazy calculation might have to
> temporarily undo the changes to all N instructions.

Wheee ...

>
> We can avoid that by converting the current temporarily_undo_changes/
> redo_changes pair into an RAII class and extending it to allow
> nested uses.  rtl_ssa::changes_are_worthwhile can then undo the
> in-progress changes once, before computing the original cost of all
> the instructions.
>
> Tested on aarch64-linux-gnu.  Also tested against the testcase in
> the PR, where the old compile time was 3.7x greater than the new compile
> time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler).
> late-combine went from being 73% of compile time to less than 1%
> (rounded to 0% by -fmem-report).  The main time sinks now seem to be
> DOM and FRE.
>
> OK to install?

OK.

Thanks,
Richard.

> Richard
>
>
> gcc/
> PR rtl-optimization/117297
> * recog.h (temporarily_undo_changes, redo_changes): Delete in
> favor of...
> (undo_recog_changes): ...this new RAII class.
> * fwprop.cc (should_replace_address): Update accordingly.
> (fwprop_propagation::check_mem): Likewise.
> (try_fwprop_subst_note): Likewise.
> (try_fwprop_subst_pattern): Likewise.
> * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise.
> * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily
> undo all in-progress changes while computing the cost of the original
> sequence.
> * recog.cc (temporarily_undone_changes): Replace with...
> (undo_recog_changes::s_num_changes): ...this static member variable.
> (validate_change_1): Update check accordingly.
> (confirm_change_group): Likewise.
> (num_validated_changes): Likewise.
> (temporarily_undo_changes): Replace with...
> (undo_recog_changes::undo_recog_changes): ...this constructor.
> (redo_changes): Replace with...
> (undo_recog_changes::~undo_recog_changes): ...this destructor.
> ---
>  gcc/fwprop.cc  | 30 --
>  gcc/recog.cc   | 39 +--
>  gcc/recog.h| 31 +--
>  gcc/rtl-ssa/changes.cc | 26 +-
>  gcc/rtl-ssa/insns.cc   |  3 +--
>  5 files changed, 76 insertions(+), 53 deletions(-)
>
> diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
> index 8cba6b7ce9f..5ddefdf6d2f 100644
> --- a/gcc/fwprop.cc
> +++ b/gcc/fwprop.cc
> @@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem, 
> rtx_insn *insn)
>
>/* Prefer the new address if it is less expensive.  */
>bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
> -  temporarily_undo_changes (old_num_changes);
> -  gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> -  MEM_ADDR_SPACE (mem), speed);
> -  redo_changes (old_num_changes);
> +  {
> +undo_recog_changes undo (old_num_changes);
> +gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> +MEM_ADDR_SPACE (mem), speed);
> +  }
>gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
> MEM_ADDR_SPACE (mem), speed);
>
> @@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem, 
> rtx_insn *insn)
>if (gain == 0)
>  {
>gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> -  temporarily_undo_changes (old_num_changes);
> +  undo_recog_changes undo (old_num_changes);
>gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> -  redo_changes (old_num_changes);
>  }
>
>return (gain > 0);
> @@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx 
> mem)
>return false;
>  }
>
> -  temporarily_undo_changes (old_num_changes);
> -  bool can_simplify = can_simplify_addr (XEXP (mem, 0));
> -  redo_changes (old_num_changes);
> +  bool can_simplify = [&]()
> +{
> +  undo_recog_changes undo (old_num_changes);
> +  return can_simplify_addr (XEXP (mem, 0));
> +} ();
>if (!can_simplify)
>  {
>failure_reason = "would replace a frame address";
> @@ -414,9 +416,10 @@ try_fwprop_subst

Re: [PATCH 05/17] testsuite: arm: Use effective-target for small-multiply-m* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:23, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/small-multiply-m0-1.c: Use effective-target
>   arm_arch_v6m and added option "-march=unset".
>   * gcc.target/arm/small-multiply-m0-2.c: Likewise.
>   * gcc.target/arm/small-multiply-m0-3.c: Likewise.
>   * gcc.target/arm/small-multiply-m0plus-1.c: Likewise.
>   * gcc.target/arm/small-multiply-m0plus-2.c: Likewise.
>   * gcc.target/arm/small-multiply-m0plus-3.c: Likewise.
>   * gcc.target/arm/small-multiply-m1-1.c: Likewise.
>   * gcc.target/arm/small-multiply-m1-2.c: Likewise.
>   * gcc.target/arm/small-multiply-m1-3.c: Likewise.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c | 4 ++--
>  9 files changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c 
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> index 52c652c1cba..c62e2f97ade 100644
> --- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v6m_ok } */
>  /* { dg-require-effective-target arm_thumb1_ok } */
> -/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
> "-mcpu=cortex-m0.small-multiply" } } */
> -/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -O2" } */
> +/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -O2" } 
> */

All of the checks in this patch set should use dg-r-e-t arm_cpu_, with 
new core entries added to the table in target-supports.exp (search for 
xscale_arm).  You can then write

dg-do compile
dg-r-e-t arm_cpu_cortex_m0small_ok
dg-options "-O2"
dg-add-options arm_cpu_cortex_m0small

etc and there's no need to check for thumb1.

OK with those changes.

R.

>  
>  int
>  test (int a)
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c 
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> index 10d49e9eace..a9e076b0e60 100644
> --- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v6m_ok } */
>  /* { dg-require-effective-target arm_thumb1_ok } */
> -/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
> "-mcpu=cortex-m0.small-multiply" } } */
> -/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
> +/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -Os" } 
> */
>  
>  int
>  test (int a)
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c 
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> index b4af511af86..973c78aee37 100644
> --- a/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v6m_ok } */
>  /* { dg-require-effective-target arm_thumb1_ok } */
> -/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
> "-mcpu=cortex-m0.small-multiply" } } */
> -/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
> +/* { dg-options "-march=unset -mcpu=cortex-m0.small-multiply -mthumb -Os" } 
> */
>  
>  int
>  test (int a)
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c 
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> index 59dba7cf4ab..53e68ae1364 100644
> --- a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v6m_ok } */
>  /* { dg-require-effective-target arm_thumb1_ok } */
> -/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { 
> "-mcpu=cortex-m0plus.small-multiply" } } */
> -/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -O2" } */
> +/* { dg-options "-march=unset -mcpu=cortex-m0plus.small-multiply -mthumb 
> -O2" } */
>  
>  int
>  test (int a)
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c 
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
> index 685ef440776..19f941dd1b7 1006

Re: [PATCH 06/17] testsuite: arm: Use effective-target for thumb2-slow-flash-data* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:23, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/thumb2-slow-flash-data-2.c: Use
>   effective-target arm_arch_v7em and added option "-march=unset
>   -mfpu=auto".
>   * gcc.target/arm/thumb2-slow-flash-data-3.c: Likewise.
>   * gcc.target/arm/thumb2-slow-flash-data-4.c: Likewise.
>   * gcc.target/arm/thumb2-slow-flash-data-5.c: Likewise.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c | 7 +++
>  gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c | 7 +++
>  gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c | 7 +++
>  gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c | 7 +++
>  4 files changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c 
> b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
> index 231243759cf..581e510762a 100644
> --- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
> +++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
> @@ -1,11 +1,10 @@
>  /* { dg-do compile } */
> -/* { dg-require-effective-target arm_cortex_m } */
>  /* { dg-require-effective-target arm_thumb2_ok } */
>  /* { dg-require-effective-target arm_fp_ok } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mcpu=*" } { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
> -/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } 
> { "-mfloat-abi=hard" } } */
> +/* { dg-require-effective-target arm_arch_v7em_ok } */
>  /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
> *-*-* } { "-mword-relocations" } } */
> -/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -O2 -mthumb 
> -mslow-flash-data" } */
> +/* { dg-options "-mfloat-abi=hard -mfpu=auto -O2 -mslow-flash-data" } */
> +/* { dg-add-options arm_arch_v7em } */

All of these should be able to use arm_arch_v7e_hard, as suggested on an 
earlier patch on this series, then
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-require-effective-target arm_fp_ok } */
should both be unnecessary, as we're only generating assembly output and we've 
forced the architecture.  Your dg-options then becomes just "-O2 
-mslow-flash-data".

OK with that change.

R.

>  
>  float f (float);
>  
> diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c 
> b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
> index 27e72ec2086..b5f4c0d7d6e 100644
> --- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
> +++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
> @@ -1,11 +1,10 @@
>  /* { dg-do compile } */
> -/* { dg-require-effective-target arm_cortex_m } */
>  /* { dg-require-effective-target arm_thumb2_ok } */
>  /* { dg-require-effective-target arm_fp_ok } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mcpu=*" } { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
> -/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } 
> { "-mfloat-abi=hard" } } */
> +/* { dg-require-effective-target arm_arch_v7em_ok } */
>  /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
> *-*-* } { "-mword-relocations" } } */
> -/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -mthumb 
> -mslow-flash-data" } */
> +/* { dg-options "-mfloat-abi=hard -mfpu=auto -mslow-flash-data" } */
> +/* { dg-add-options arm_arch_v7em } */
>  
>  /* From PR71607 */
>  
> diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c 
> b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
> index 8dbe87a1e68..a4d3846996e 100644
> --- a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
> +++ b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
> @@ -1,11 +1,10 @@
>  /* { dg-do compile } */
> -/* { dg-require-effective-target arm_cortex_m } */
>  /* { dg-require-effective-target arm_thumb2_ok } */
>  /* { dg-require-effective-target arm_fp_ok } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mcpu=*" } { "-mcpu=cortex-m4" "-mcpu=cortex-m7" } } */
> -/* { dg-skip-if "do not override -mfloat-abi" { *-*-* } { "-mfloat-abi=*" } 
> { "-mfloat-abi=hard" } } */
> +/* { dg-require-effective-target arm_arch_v7em_ok } */
>  /* { dg-skip-if "-mslow-flash-data and -mword-relocations incompatible" { 
> *-*-* } { "-mword-relocations" } } */
> -/* { dg-options "-march=armv7e-m+fp -mfloat-abi=hard -O2 -mthumb 
> -mslow-flash-data" } */
> +/* { dg-options "-mfloat-abi=hard -mfpu=auto -O2 -mslow-flash-data" } */
> +/* { dg-add-options arm_arch_v7em } */
>  
>  double __attribute__ ((target ("fpu=fpv5-d16")))
>  foo (void)
> diff --git a/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c 
> b/gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c
> index b98eb7624e4..0fcfb65c5

Re: [PATCH 15/17] testsuite: arm: Use -mcpu=unset when overriding -march

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:24, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
>   * gcc.dg/pr41574.c: Added option "-mcpu=unset".
>   * gcc.dg/pr59418.c: Likewise.
>   * lib/target-supports.exp (add_options_for_vect_early_break):
>   Likewise.
>   (add_options_for_arm_v8_neon): Likewise.
>   (check_effective_target_arm_neon_ok_nocache): Likewise.
>   (check_effective_target_arm_simd32_ok_nocache): Likewise.
>   (check_effective_target_arm_sat_ok_nocache): Likewise.
>   (check_effective_target_arm_dsp_ok_nocache): Likewise.
>   (check_effective_target_arm_crc_ok_nocache): Likewise.
>   (add_options_for_aarch64_sve): Likewise.

No, this is an aarch64 target architecture option, not an arm target 
architecture option.

>   (check_effective_target_arm_v8_neon_ok_nocache): Likewise.
>   (check_effective_target_aarch64_fjcvtzs_hw): Likewise.
>   (check_effective_target_arm_v8_1m_mve_fp_ok_nocache): Likewise.
>   (check_effective_target_arm_v8_1a_neon_ok_nocache): Likewise.
>   (check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_2a_fp16_neon_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_1m_mve_ok_nocache): Likewise.
>   (check_effective_target_arm_v8_2a_i8mm_ok_nocache): Likewise.
>   (check_effective_target_arm_fp16fml_neon_ok_nocache): Likewise.
>   (check_effective_target_arm_v8_2a_bf16_neon_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8m_main_cde_ok_nocache): Likewise.
>   (check_effective_target_arm_v8m_main_cde_fp_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_1m_main_cde_mve_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_1m_main_cde_mve_fp_ok_nocache):
>   Likewise.

as are the following...

>   (check_effective_target_aarch64_asm_fp_ok): Likewise.
>   (check_effective_target_aarch64_asm_simd_ok): Likewise.
>   (check_effective_target_aarch64_asm_crypto_ok): Likewise.
>   (check_effective_target_aarch64_asm_crc_ok): Likewise.
>   (check_effective_target_aarch64_asm_lse_ok): Likewise.
>   (check_effective_target_aarch64_asm_dotprod_ok): Likewise.
>   (check_effective_target_aarch64_asm_sve_ok): Likewise.
>   (check_effective_target_aarch64_asm_i8mm_ok): Likewise.
>   (check_effective_target_aarch64_asm_f32mm_ok): Likewise.
>   (check_effective_target_aarch64_asm_f64mm_ok): Likewise.
>   (check_effective_target_aarch64_asm_bf16_ok): Likewise.
>   (check_effective_target_aarch64_asm_sb_ok): Likewise.
>   (check_effective_target_aarch64_asm_sve2_ok): Likewise.
>   (check_effective_target_aarch64_asm_ls64_ok): Likewise.
>   (check_effective_target_aarch64_asm_sme_ok): Likewise.
>   (check_effective_target_aarch64_asm_sme-i16i64_ok): Likewise.
>   (check_effective_target_aarch64_asm_sme2_ok): Likewise.

... to here.

You'll need to back out these bits.

R.

>   (check_effective_target_arm_v8_3a_complex_neon_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache):
>   Likewise.
>   (check_effective_target_arm_v8_1_lob_ok): Likewise.
> ---
>  gcc/testsuite/gcc.dg/pr41574.c|  2 +-
>  gcc/testsuite/gcc.dg/pr59418.c|  2 +-
>  gcc/testsuite/lib/target-supports.exp | 66 +--
>  3 files changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/pr41574.c b/gcc/testsuite/gcc.dg/pr41574.c
> index 062c0044532..e25295bc4fd 100644
> --- a/gcc/testsuite/gcc.dg/pr41574.c
> +++ b/gcc/testsuite/gcc.dg/pr41574.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon 
> -fno-unsafe-math-optimizations -fdump-rtl-combine" { target { arm*-*-* } } } 
> */
> +/* { dg-options "-O2 -mcpu=unset -march=armv7-a -mfloat-abi=softfp 
> -mfpu=neon -fno-unsafe-math-optimizations -fdump-rtl-combine" { target { 
> arm*-*-* } } } */
>  /* { dg-options "-O2 -fno-unsafe-math-optimizations -fdump-rtl-combine" { 
> target { ! arm*-*-* } } } */
>  
>  
> diff --git a/gcc/testsuite/gcc.dg/pr59418.c b/gcc/testsuite/gcc.dg/pr59418.c
> index 4b54ef2b42d..6ab46ecde8a 100644
> --- a/gcc/testsuite/gcc.dg/pr59418.c
> +++ b/gcc/testsuite/gcc.dg/pr59418.c
> @@ -3,7 +3,7 @@
>  
>  /* { dg-do compile } */
>  /* { dg-options "-Os -g" } */
> -/* { dg-options "-march=armv7-a+fp -mfloat-abi=hard -Os -g" { target { 
> arm*-*-* && { ! arm_thumb1 } } } } */
> +/* { dg-options "-mcpu=unset -march=armv7-a+fp -mfloat-abi=hard -Os -g" { 
> target { arm*-*-* && { ! arm_thumb1 } } } } */
>  
>  extern int printf (const char *__format, ...);
>  double bar (const char *, int);
> diff --git a/gcc/testsuite/lib/target-supp

Re: [PATCH 17/17] testsuite: arm: Use effective-target for pr96939 test

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:24, Torbjörn SVENSSON wrote:
> Update test case to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/lto/pr96939_0.c: Use effective-target
>   arm_arch_v8a.
>   * gcc.target/arm/lto/pr96939_1.c: Remove dg-options.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/lto/pr96939_0.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/lto/pr96939_1.c | 1 -
>  2 files changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c 
> b/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
> index 241ffd5da0a..3bb74bd1a1d 100644
> --- a/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
> +++ b/gcc/testsuite/gcc.target/arm/lto/pr96939_0.c
> @@ -1,7 +1,7 @@
>  /* PR target/96939 */
>  /* { dg-lto-do link } */
> -/* { dg-require-effective-target arm_arch_v8a_ok } */
> -/* { dg-lto-options { { -flto -O2 } } } */
> +/* { dg-require-effective-target arm_arch_v8a_link } */
> +/* { dg-lto-options { { -flto -O2 -mcpu=unset -march=armv8-a+simd+crc } } } 
> */
>  
>  extern unsigned crc (unsigned, const void *);
>  typedef unsigned (*fnptr) (unsigned, const void *);
> diff --git a/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c 
> b/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
> index 4afdbdaf5ad..c641b5580ab 100644
> --- a/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
> +++ b/gcc/testsuite/gcc.target/arm/lto/pr96939_1.c
> @@ -1,5 +1,4 @@
>  /* PR target/96939 */
> -/* { dg-options "-march=armv8-a+simd+crc" } */
>  
>  #include 
>  

I'm not sure this is right.  The PR talks about handling streaming in of 
objects built with different options, which are supposed to be recorded in the 
streaming data.  But your change alters what will be recorded AFAICT.

R.


[committed] c: Do not register nullptr_t built-in type [PR114869]

2024-11-19 Thread Joseph Myers
As reported in bug 114869, the C front end wrongly creates nullptr_t
as a built-in typedef; it should only be defined in .  While
the type node needs a name for debug info generation, it doesn't need
to be a valid identifier; use typeof (nullptr) instead, similar to how
the C++ front end uses decltype(nullptr) for this purpose.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

PR c/114869

gcc/c/
* c-decl.cc (c_init_decl_processing): Register nullptr_type_node
as typeof (nullptr) not nullptr_t.

gcc/testsuite/
* gcc.dg/c23-nullptr-5.c: Use typeof (nullptr) not nullptr_t.
* gcc.dg/c11-nullptr-2.c, gcc.dg/c11-nullptr-3.c,
gcc.dg/c23-nullptr-7.c: New tests

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1128d72ccb03..96bfe9290fd9 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -4799,7 +4799,7 @@ c_init_decl_processing (void)
boolean_type_node));
 
   /* C-specific nullptr initialization.  */
-  record_builtin_type (RID_MAX, "nullptr_t", nullptr_type_node);
+  record_builtin_type (RID_MAX, "typeof (nullptr)", nullptr_type_node);
   /* The size and alignment of nullptr_t is the same as for a pointer to
  character type.  */
   SET_TYPE_ALIGN (nullptr_type_node, GET_MODE_ALIGNMENT (ptr_mode));
diff --git a/gcc/testsuite/gcc.dg/c11-nullptr-2.c 
b/gcc/testsuite/gcc.dg/c11-nullptr-2.c
new file mode 100644
index ..3b37b11ed986
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c11-nullptr-2.c
@@ -0,0 +1,5 @@
+/* Test there is no nullptr_t built-in typedef.  Bug 114869.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c11 -pedantic-errors" } */
+
+int nullptr_t;
diff --git a/gcc/testsuite/gcc.dg/c11-nullptr-3.c 
b/gcc/testsuite/gcc.dg/c11-nullptr-3.c
new file mode 100644
index ..5abb082043cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c11-nullptr-3.c
@@ -0,0 +1,7 @@
+/* Test there is no nullptr_t in  for C11.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c11 -pedantic-errors" } */
+
+#include 
+
+int nullptr_t;
diff --git a/gcc/testsuite/gcc.dg/c23-nullptr-5.c 
b/gcc/testsuite/gcc.dg/c23-nullptr-5.c
index 54266af70fb3..f708ec2cfc08 100644
--- a/gcc/testsuite/gcc.dg/c23-nullptr-5.c
+++ b/gcc/testsuite/gcc.dg/c23-nullptr-5.c
@@ -3,7 +3,7 @@
 /* { dg-options "-std=c23 -pedantic-errors" } */
 
 int i;
-nullptr_t fn () { ++i; return nullptr; }
+typeof (nullptr) fn () { ++i; return nullptr; }
 
 int
 main ()
diff --git a/gcc/testsuite/gcc.dg/c23-nullptr-7.c 
b/gcc/testsuite/gcc.dg/c23-nullptr-7.c
new file mode 100644
index ..2692e30ea2ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c23-nullptr-7.c
@@ -0,0 +1,5 @@
+/* Test there is no nullptr_t built-in typedef.  Bug 114869.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+
+int nullptr_t;

-- 
Joseph S. Myers
josmy...@redhat.com



Re: Should -fsanitize=bounds support counted-by attribute for pointers inside a structure?

2024-11-19 Thread Martin Uecker
Am Montag, dem 18.11.2024 um 21:31 + schrieb Qing Zhao:
> 
> > On Nov 18, 2024, at 13:10, Martin Uecker  wrote:
> 
...
> So, I guess that the more accurate question is, for the following:
> 
> struct annotated {
>   int b;
>   int *c __attribute__ ((counted_by (b)));
> } *p_array_annotated;
> 
>   p_array_annotated->c[10] = 2;
> 
> 
> Should we treat the reference “p_array_annotated->c[10]” as 
> an array reference if the pointer field “c” in the “struct annotated” 
> has the counted_by attribute? 

Assuming UBSan is they way to go, then yes and I agree
that after casting to another type this should not be
done anymore.

> 
> > 
> > I am a bit frustrated about the sanitizer.  On the
> > one hand, it is not doing enough to get spatial memory
> > safety even where this would be easily possible, on the
> > other hand, is pedantic about things which are technically
> > UB but not problematic and then one is prevented from
> > using it. 
> 
> Yes, In order to make sanitizer better, both the above issues need to be 
> addressed. 
> > 
> > When used in default mode, where execution continues, it
> > also does not mix well with many warning, creates more code,
> > and pulls in a libary dependency (and the library also depends
> > on upstream choices / progress which seems a limitation for
> > extensions).
> 
> Right, all these are existing issues with the current sanitizer. 
> > 
> > What IMHO would be ideal is a protection mode for spatial
> > memory safety that simply adds traps (which then requires
> > no library, has no issues with other warnings, and could
> > evolve independently from clang)
> > 
> > So shouldn't we just add a -fboundscheck (which would 
> > be like -fsanitize=bounds -fsanitize-trap=bounds just with
> > more checking) and make it really good? I think many people
> > would be very happy about this.
> 
> Then why not just fix the known issues in the current
> -fsanitize=bounds -fsanitize-trap=bounds to make it better?
> What’s the major benefit to add another new option? 

The question is how to fix this?  

At the moment the sanitizer is tied to a shared C++ library
maintained elsewhere (I believe) with a design that ties
every specific case to a specific entry point in this library.

So the UBsan handlers become part of an ABI that needs to
be maintained and upgraded.  Also you need to reimplement
this when using it somewhere we you can't have a C++ library.
(I assume kernels or embedded platforms have all their
own implementations).  If we add something, everything
needs to be upgraded.  For 'counted_by' and 'bounds' you
may get a way with the existing message.  


Martin

> 
> Thanks.
> 
> Qing
> > 
> > Martin
> > 
> > 
> > > 
> > > For the following small example:
> > > 
> > > #include 
> > > 
> > > struct annotated {
> > >  int b;
> > >  int *c __attribute__ ((counted_by (b)));
> > > } *p_array_annotated;
> > > 
> > > void __attribute__((__noinline__)) setup (int annotated_count)
> > > {
> > >  p_array_annotated
> > >= (struct annotated *)malloc (sizeof (struct annotated));
> > >  p_array_annotated->c = (int *) malloc (annotated_count *  sizeof (int));
> > >  p_array_annotated->b = annotated_count;
> > > 
> > >  return;
> > > }
> > > 
> > > int main(int argc, char *argv[])
> > > {
> > >  setup (10);
> > >  p_array_annotated->c[11] = 2;
> > >  return 0;
> > > }
> > > 
> > > Should ubsan add instrumentation to the above reference 
> > > p_array_annoated->c[11] inside routine “main”?
> > > 
> > > From my understanding, ubsan does not add bound checking for any pointer 
> > > reference now, however, when the “counted_by” attribute is attached to a 
> > > pointer field inside a structure, the “bound” information for this 
> > > pointer is known, should we enhance the ubsan to instrument such 
> > > reference? 
> > > 
> > > If Yes, then should we add the following limitation to the end user:
> > > 
> > >  When the counted_by attribute is attached to a pointer field, the 
> > > -fsantize=bounds only work for such reference when the pointer is NOT 
> > > casted to another type other than the original target type?
> > > 
> > > Thanks for any comments and suggestions.
> > > 
> > > Qing
> > 
> 



Re: [PATCH 1/2] asan: Support dynamic shadow offset

2024-11-19 Thread Jeff Law




On 11/14/24 9:14 PM, Kito Cheng wrote:

AddressSanitizer has supported dynamic shadow offsets since 2016[1], but
GCC hasn't implemented this yet because targets using dynamic shadow
offsets, such as Fuchsia and iOS, are mostly unsupported in GCC.

However, RISC-V 64 switched to dynamic shadow offsets this year[2] because
virtual memory space support varies across different RISC-V cores, such as
Sv39, Sv48, and Sv57. We realized that the best way to handle this
situation is by using a dynamic shadow offset to obtain the offset at
runtime.

We introduce a new target hook, TARGET_ASAN_DYNAMIC_SHADOW_OFFSET_P, to
determine if the target is using a dynamic shadow offset, so this change
won't affect the static offset path. Additionally, TARGET_ASAN_SHADOW_OFFSET
continues to work even if TARGET_ASAN_DYNAMIC_SHADOW_OFFSET_P is non-zero,
ensuring that KASAN functions as expected.

This patch set has been verified on the Banana Pi F3, currently one of the
most popular RISC-V development boards. All AddressSanitizer-related tests
passed without introducing new regressions.

It was also verified on AArch64 and x86_64 with no regressions in
AddressSanitizer.

[1] 
https://github.com/llvm/llvm-project/commit/130a190bf08a3d955d9db24dac936159dc049e12
[2] 
https://github.com/llvm/llvm-project/commit/da0c8b275564f814a53a5c19497669ae2d99538d
---
  gcc/asan.cc   | 80 ---
  gcc/asan.h|  3 ++
  gcc/config/riscv/riscv.cc |  3 ++
  gcc/doc/tm.texi   |  6 ++-
  gcc/doc/tm.texi.in|  2 +
  gcc/sanopt.cc |  4 ++
  gcc/target.def|  8 +++-
  gcc/toplev.cc |  3 +-
  8 files changed, 101 insertions(+), 8 deletions(-)

Needs a ChangeLog.  New functions should have function comments.

OK with those changes if nobody has objected in 48hrs.

Thanks,
jeff


Re: [PATCH 04/17] testsuite: arm: Use effective-target for pure-code/* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:23, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/pure-code/no-literal-pool-m0.c: Use
>   effective-target arm_cpu_cortex-m0.
>   * gcc.target/arm/pure-code/no-literal-pool-m23.c: Use
>   effective-target arm_cpu_cortex-m23.
>   * gcc.target/arm/pure-code/pr109800.c: Use effective-target
>   arm_arch_v7m and added option "-mcpu=unset".
>   * target-supports.exp: Define effective-target
>   arm_cpu_cortex_m0 and arm_cpu_cortex_m23.
> 
> Signed-off-by: Torbjörn SVENSSON 

See comments on individual tests.

> ---
>  gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c  | 5 +++--
>  gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c | 5 +++--
>  gcc/testsuite/gcc.target/arm/pure-code/pr109800.c| 3 ++-
>  gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c   | 5 +++--
>  gcc/testsuite/lib/target-supports.exp| 2 ++
>  5 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> index bd6f4af183b..5bdbebb1a53 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
> -/* { dg-options "-mpure-code -mcpu=cortex-m0 -march=armv6s-m -mthumb 
> -mfloat-abi=soft" } */
> +/* { dg-require-effective-target arm_cpu_cortex_m0_ok } */
> +/* { dg-options "-mpure-code" } */
> +/* { dg-add-options arm_cpu_cortex_m0 }*/
>  /* { dg-final { check-function-bodies "**" "" } } */

OK

>  
>  /* Does not use thumb1_gen_const_int.
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
> index 95370126ce8..80a6b51138b 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m23.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
> -/* { dg-options "-mpure-code -mcpu=cortex-m23 -march=armv8-m.base -mthumb 
> -mfloat-abi=soft" } */
> +/* { dg-require-effective-target arm_cpu_cortex_m23_ok } */
> +/* { dg-options "-mpure-code" } */
> +/* { dg-add-options arm_cpu_cortex_m23 } */
>  /* { dg-final { check-function-bodies "**" "" } } */

OK

>  
>  /*
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
> index d797b790232..ace37cd6bc9 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/pr109800.c
> @@ -1,4 +1,5 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v7m_link } */
Why do we need 'link' here, when the dg-do is 'compile'?

>  /* { dg-require-effective-target arm_hard_ok } */
> -/* { dg-options "-O2 -march=armv7-m -mfloat-abi=hard -mfpu=fpv4-sp-d16 
> -mbig-endian -mpure-code" } */
> +/* { dg-options "-O2 -mcpu=unset -march=armv7-m -mfloat-abi=hard 
> -mfpu=fpv4-sp-d16 -mbig-endian -mpure-code" } */

Hmm, this architecture doesn't have an FPU, though armv7e-m does (and it's 
equivalent to the fpv4-sp-d16).  So I think we want another entry in 
target-supports for this: v7em_hard (that uses armv7e-m+fp and -mfpu=auto).  
Then the rules here collapse to

dg-do compile
dg-r-e-t arm_arch_v7em_hard_ok
dg-options "-O2 -mbig-endian -mpure-code"
dg-add-options arm_arch_v7em_hard



>  double f() { return 5.0; }
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
> index 31061d5d445..68c223fbd15 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/pr94538-1.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
> -/* { dg-options "-mpure-code -mcpu=cortex-m23 -march=armv8-m.base -mthumb 
> -mfloat-abi=soft" } */
> +/* { dg-require-effective-target arm_cpu_cortex_m23_ok } */
> +/* { dg-options "-mpure-code" } */
> +/* { dg-add-options arm_cpu_cortex_m23 } */
>  

OK

>  typedef int __attribute__ ((__vector_size__ (16))) V;
>  
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 01ed55ed82f..d973b1863bd 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -5848,6 +5848,8 @@ foreach { armfunc armflag armdefs } {
>  foreach { armfunc armflag armdefs } {
>   xscale_arm "-mcpu=xscale -mfloat-abi=soft -marm" "__XSCALE__ && 
> !__thumb__"
>   cortex_a57 "-mcpu=cortex-a57" __A

Re: [PATCH 12/17] testsuite: arm: Use -march=unset for bfloat16_scalar* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:24, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/bfloat16_scalar_1_2.c: Added option
>   "-march=unset".
>   * gcc.target/arm/bfloat16_scalar_2_1.c: Likewise.
>   * gcc.target/arm/bfloat16_scalar_2_2.c: Likewise.
>   * gcc.target/arm/bfloat16_scalar_3_1.c: Likewise.
>   * gcc.target/arm/bfloat16_scalar_3_2.c: Likewise.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c | 2 +-
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c | 2 +-
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c | 2 +-
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c | 2 +-
>  gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c | 2 +-
>  5 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> index 8293cafcc14..0d4c3ffec53 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_1_2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { arm*-*-* } } } */
>  /* { dg-require-effective-target arm_v8_neon_ok } */
>  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> -/* { dg-additional-options "-march=armv8.2-a+bf16 -mfloat-abi=softfp 
> -mfpu=auto" } */
> +/* { dg-additional-options "-mcpu=unset -march=armv8.2-a+bf16 
> -mfloat-abi=softfp -mfpu=auto" } */
>  /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
>  /* { dg-final { check-function-bodies "**" "" } } */

For all of these tests I'd just add v8_2a_bf16 to the table of architectures, 
then use that.  We then don't need to mess about with multiple dg-r-e-t rules.

R.

>  
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
> index e84f837e162..43c6ce0c1d3 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { arm*-*-* } } } */
>  /* { dg-require-effective-target arm_v8_neon_ok } */
>  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> -/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=hard 
> -mfpu=neon-fp-armv8" } */
> +/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=hard 
> -mfpu=neon-fp-armv8" } */
>  /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> index 93ec059819a..64b584ea34c 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_2_2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { arm*-*-* } } } */
>  /* { dg-require-effective-target arm_v8_neon_ok } */
>  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> -/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=softfp 
> -mfpu=neon-fp-armv8" } */
> +/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=softfp 
> -mfpu=neon-fp-armv8" } */
>  /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
> index a1a70690322..eb9baba9cd8 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_1.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { arm*-*-* } } } */
>  /* { dg-require-effective-target arm_v8_neon_ok } */
>  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> -/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=hard 
> -mfpu=neon-fp-armv8" } */
> +/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=hard 
> -mfpu=neon-fp-armv8" } */
>  /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
> diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c 
> b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> index f49072613f0..74a74ec54fe 100644
> --- a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> +++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_3_2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do assemble { target { arm*-*-* } } } */
>  /* { dg-require-effective-target arm_v8_neon_ok } */
>  /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> -/* { dg-additional-options "-march=armv8.2-a -mfloat-abi=softfp 
> -mfpu=neon-fp-armv8" } */
> +/* { dg-additional-options "-mcpu=unset -march=armv8.2-a -mfloat-abi=softfp 
> -mfpu=neon-fp-armv8" } */
>  /* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
>  /* { dg-final

RE: [RFC] PR81358: Enable automatic linking of libatomic

2024-11-19 Thread Prathamesh Kulkarni


> -Original Message-
> From: Xi Ruoyao 
> Sent: 16 November 2024 09:23
> To: Prathamesh Kulkarni ; josmy...@redhat.com;
> Matthew Malcomson ; gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] PR81358: Enable automatic linking of libatomic
> 
> External email: Use caution opening links or attachments
> 
> 
> On Sat, 2024-11-16 at 03:44 +, Prathamesh Kulkarni wrote:
> > diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index
> > 9798e7c09e9..62cd5e0a76b 100644
> > --- a/libatomic/Makefile.in
> > +++ b/libatomic/Makefile.in
> > @@ -1,7 +1,7 @@
> > -# Makefile.in generated by automake 1.15.1 from Makefile.am.
> > +# Makefile.in generated by automake 1.16.1 from Makefile.am.
> 
> You cannot use a random automake version.  Please use the version
> specified in https://gcc.gnu.org/install/prerequisites.html.
Hi Xi,
Ah indeed, thanks for pointing out.
The attached patch uses automake-1.15.1 and autoconf-2.69 to autogenerate 
Makefile.in and configure files.
Bootstrapped+tested on aarch64-linux-gnu.
Does the patch look in the right direction ?

Thanks,
Prathamesh
> 
> --
> Xi Ruoyao 
> School of Aerospace Science and Technology, Xidian University
PR81358: Enable automatic linking of libatomic.

ChangeLog:
PR driver/81358
* Makefile.def: Add dependencies so libatomic is built before target
libraries are configured.
* configure.ac: Add libatomic to bootstrap_target_libs.
* Makefile.in: Regenerate.
* configure: Regenerate.

gcc/ChangeLog:
PR driver/81358
* common.opt: New option -flink-libatomic.
* gcc.cc (LINK_LIBATOMIC_SPEC): New macro.
* config/gnu-user.h (GNU_USER_TARGET_LINK_GCC_C_SEQUENCE_SPEC): Use
LINK_LIBATOMIC_SPEC.

libatomic/ChangeLog:
PR driver/81358
* Makefile.am: Pass -fno-link-libatomic.
New rule all.
* configure.ac: Pass -fno-link-libatomic. 
* Makefile.in: Regenerate.
* configure: Regenerate.

Signed-off-by: Prathamesh Kulkarni 
Co-authored-by: Matthew Malcolmson 

diff --git a/Makefile.def b/Makefile.def
index 19954e7d731..90899fa28cf 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -656,6 +656,26 @@ lang_env_dependencies = { module=libgcc; no_gcc=true; 
no_c=true; };
 // a dependency on libgcc for native targets to configure.
 lang_env_dependencies = { module=libiberty; no_c=true; };
 
+dependencies = { module=configure-target-libbacktrace; 
on=all-target-libatomic; };
+dependencies = { module=configure-target-libgloss; on=all-target-libatomic; };
+dependencies = { module=configure-target-newlib; on=all-target-libatomic; };
+dependencies = { module=configure-target-libgomp; on=all-target-libatomic; };
+dependencies = { module=configure-target-libitm; on=all-target-libatomic; };
+dependencies = { module=configure-target-libstdc++v3; on=all-target-libatomic; 
};
+dependencies = { module=configure-target-libsanitizer; 
on=all-target-libatomic; };
+dependencies = { module=configure-target-libvtv; on=all-target-libatomic; };
+dependencies = { module=configure-target-libssp; on=all-target-libatomic; };
+dependencies = { module=configure-target-libquadmath; on=all-target-libatomic; 
};
+dependencies = { module=configure-target-libgfortran; on=all-target-libatomic; 
};
+dependencies = { module=configure-target-libffi; on=all-target-libatomic; };
+dependencies = { module=configure-target-libobjc; on=all-target-libatomic; };
+dependencies = { module=configure-target-libada; on=all-target-libatomic; };
+dependencies = { module=configure-target-libgm2; on=all-target-libatomic; };
+dependencies = { module=configure-target-libgo; on=all-target-libatomic; };
+dependencies = { module=configure-target-libgrust; on=all-target-libatomic; };
+dependencies = { module=configure-target-libphobos; on=all-target-libatomic; };
+dependencies = { module=configure-target-zlib; on=all-target-libatomic; };
+
 dependencies = { module=configure-target-fastjar; on=configure-target-zlib; };
 dependencies = { module=all-target-fastjar; on=all-target-zlib; };
 dependencies = { module=configure-target-libgo; on=configure-target-libffi; };
diff --git a/Makefile.in b/Makefile.in
index 966d6045496..5295929bfa9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -68551,6 +68551,66 @@ all-flex: maybe-all-build-bison
 all-flex: maybe-all-m4
 all-flex: maybe-all-build-texinfo
 all-m4: maybe-all-build-texinfo
+configure-target-libbacktrace: maybe-all-target-libatomic
+configure-stage1-target-libbacktrace: maybe-all-stage1-target-libatomic
+configure-stage2-target-libbacktrace: maybe-all-stage2-target-libatomic
+configure-stage3-target-libbacktrace: maybe-all-stage3-target-libatomic
+configure-stage4-target-libbacktrace: maybe-all-stage4-target-libatomic
+configure-stageprofile-target-libbacktrace: 
maybe-all-stageprofile-target-libatomic
+configure-stagetrain-target-libbacktrace: maybe-all-stagetrain-target-libatomic
+configure-stagefeedback-target-libbacktrace: 
maybe-all-stagefeedback-target-libat

Re: [PATCH] aarch64: Mark __builtin_aarch64_im_lane_boundsi as leaf and nothrow [PR117665]

2024-11-19 Thread Andrew Pinski
On Tue, Nov 19, 2024 at 9:21 AM Andrew Pinski  wrote:
>
> __builtin_aarch64_im_lane_boundsi is known not to throw or call back into 
> another
> function since it will either folded into an NOP or will produce a compiler 
> error.
>
> This fixes the ICE by fixing the missed optimization. It does not fix the 
> underlying
> issue with fold_marked_statements; which I filed as PR 117668.

I forgot to mention that I will be auditing the rest of the aarch64
builtins and add leaf/nothrow as needed. I recorded that as PR 117666.

Thanks,
Andrew

>
> Built and tested for aarch64-linux-gnu.
>
> PR target/117665
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-builtins.cc 
> (aarch64_init_simd_builtin_functions):
> Pass nothrow and leaf as attributes to aarch64_general_add_builtin for
> __builtin_aarch64_im_lane_boundsi.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/aarch64/lane-bound-1.C: New test.
> * gcc.target/aarch64/lane-bound-3.c: New test.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/config/aarch64/aarch64-builtins.cc|  6 -
>  .../g++.target/aarch64/lane-bound-1.C | 21 +++
>  .../gcc.target/aarch64/lane-bound-3.c | 27 +++
>  3 files changed, 53 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.target/aarch64/lane-bound-1.C
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-3.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index b860e22f01f..e26ee323a2d 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -1482,10 +1482,14 @@ aarch64_init_simd_builtin_functions (bool 
> called_from_pragma)
>   size_type_node,
>   intSI_type_node,
>   NULL);
> +  /* aarch64_im_lane_boundsi should be leaf and nothrow as it
> +is expanded as nop or will cause an user error.  */
> +  tree attrs = aarch64_add_attribute ("nothrow", NULL_TREE);
> +  attrs = aarch64_add_attribute ("leaf", attrs);
>aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
> = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
>lane_check_fpr,
> -  AARCH64_SIMD_BUILTIN_LANE_CHECK);
> +  AARCH64_SIMD_BUILTIN_LANE_CHECK, 
> attrs);
>  }
>
>for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
> diff --git a/gcc/testsuite/g++.target/aarch64/lane-bound-1.C 
> b/gcc/testsuite/g++.target/aarch64/lane-bound-1.C
> new file mode 100644
> index 000..cb3e99816a1
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/lane-bound-1.C
> @@ -0,0 +1,21 @@
> +// { dg-do compile }
> +// { dg-options "" }
> +#include 
> +
> +// vgetq_lane_u64 should not cause any
> +// exceptions to thrown so even at -O0
> +// removeme should have been removed.
> +void removeme()
> +__attribute__((error("nothrow")));
> +int _setjmp();
> +void hh(uint64x2_t c, int __b)
> +{
> +  try {
> +vgetq_lane_u64(c, __b);
> +// { dg-error "must be a constant immediate" "" { target *-*-* } 0 }
> +  } catch (...)
> +  {
> +removeme(); // { dg-bogus "declared with attribute error" }
> +  }
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c 
> b/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c
> new file mode 100644
> index 000..9e0dad372cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +/* PR target/117665 */
> +/* __builtin_aarch64_im_lane_boundsi was causing an abnormal
> +   edge to the setjmp but then the builtin was folded into a nop
> +   and that edge was never removed but the edge was not needed in
> +   the first place. */
> +
> +#include 
> +
> +__attribute__((always_inline))
> +static inline
> +void h(uint64x2_t c, int __b) {
> +   /* Use vgetq_lane_u64 to get a
> + __builtin_aarch64_im_lane_boundsi */
> +   vgetq_lane_u64(c, __b);
> +
> +  __builtin_unreachable();
> +}
> +
> +int _setjmp();
> +void hh(uint64x2_t c) {
> +  int __b = 0;
> +  if (_setjmp())
> +h(c, 0);
> +}
> --
> 2.43.0
>


[PATCH] aarch64: Mark __builtin_aarch64_im_lane_boundsi as leaf and nothrow [PR117665]

2024-11-19 Thread Andrew Pinski
__builtin_aarch64_im_lane_boundsi is known not to throw or call back into 
another
function since it will either folded into an NOP or will produce a compiler 
error.

This fixes the ICE by fixing the missed optimization. It does not fix the 
underlying
issue with fold_marked_statements; which I filed as PR 117668.

Built and tested for aarch64-linux-gnu.

PR target/117665

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc 
(aarch64_init_simd_builtin_functions):
Pass nothrow and leaf as attributes to aarch64_general_add_builtin for
__builtin_aarch64_im_lane_boundsi.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/lane-bound-1.C: New test.
* gcc.target/aarch64/lane-bound-3.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-builtins.cc|  6 -
 .../g++.target/aarch64/lane-bound-1.C | 21 +++
 .../gcc.target/aarch64/lane-bound-3.c | 27 +++
 3 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/lane-bound-1.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-3.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index b860e22f01f..e26ee323a2d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1482,10 +1482,14 @@ aarch64_init_simd_builtin_functions (bool 
called_from_pragma)
  size_type_node,
  intSI_type_node,
  NULL);
+  /* aarch64_im_lane_boundsi should be leaf and nothrow as it
+is expanded as nop or will cause an user error.  */
+  tree attrs = aarch64_add_attribute ("nothrow", NULL_TREE);
+  attrs = aarch64_add_attribute ("leaf", attrs);
   aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
= aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
   lane_check_fpr,
-  AARCH64_SIMD_BUILTIN_LANE_CHECK);
+  AARCH64_SIMD_BUILTIN_LANE_CHECK, attrs);
 }
 
   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
diff --git a/gcc/testsuite/g++.target/aarch64/lane-bound-1.C 
b/gcc/testsuite/g++.target/aarch64/lane-bound-1.C
new file mode 100644
index 000..cb3e99816a1
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/lane-bound-1.C
@@ -0,0 +1,21 @@
+// { dg-do compile }
+// { dg-options "" }
+#include 
+
+// vgetq_lane_u64 should not cause any
+// exceptions to thrown so even at -O0
+// removeme should have been removed.
+void removeme()
+__attribute__((error("nothrow")));
+int _setjmp();
+void hh(uint64x2_t c, int __b)
+{
+  try {
+vgetq_lane_u64(c, __b);
+// { dg-error "must be a constant immediate" "" { target *-*-* } 0 }
+  } catch (...)
+  {
+removeme(); // { dg-bogus "declared with attribute error" }
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c 
b/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c
new file mode 100644
index 000..9e0dad372cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-3.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR target/117665 */
+/* __builtin_aarch64_im_lane_boundsi was causing an abnormal
+   edge to the setjmp but then the builtin was folded into a nop
+   and that edge was never removed but the edge was not needed in
+   the first place. */
+
+#include 
+
+__attribute__((always_inline))
+static inline
+void h(uint64x2_t c, int __b) {
+   /* Use vgetq_lane_u64 to get a 
+ __builtin_aarch64_im_lane_boundsi */
+   vgetq_lane_u64(c, __b);
+
+  __builtin_unreachable();
+}
+
+int _setjmp();
+void hh(uint64x2_t c) {
+  int __b = 0;
+  if (_setjmp())
+h(c, 0);
+}
-- 
2.43.0



Re: [RFC PATCH 1/5] vect: Force alignment peeling to vectorize more early break loops

2024-11-19 Thread Alex Coplan
On 19/11/2024 17:02, Richard Sandiford wrote:
> Sorry for the slow review.  Finally catching up on backlog.
> 
> Richard Biener  writes:
> > On Mon, 28 Oct 2024, Alex Coplan wrote:
> >
> >> This allows us to vectorize more loops with early exits by forcing
> >> peeling for alignment to make sure that we're guaranteed to be able to
> >> safely read an entire vector iteration without crossing a page boundary.
> >> 
> >> To make this work for VLA architectures we have to allow compile-time
> >> non-constant target alignments.  We also have to override the result of
> >> the target's preferred_vector_alignment hook if it isn't a power-of-two
> >> multiple of the TYPE_SIZE of the chosen vector type.
> >> 
> >> There is currently an implicit assumption that the TYPE_SIZE of the
> >> vector type is itself a power of two.  For non-VLA types this
> >> could be checked directly in the vectorizer.  For VLA types I
> >> had discussed offline with Richard S about adding a target hook to allow
> >> the vectorizer to query the backend to confirm that a given VLA type
> >> is known to have a power-of-two size at runtime.
> >
> > GCC assumes all vectors have power-of-two size, so I don't think we
> > need to check anything but we'd instead have to make sure the
> > target constrains the hardware when this assumption doesn't hold
> > in silicon.
> 
> We did at one point support non-power-of-2 for VLA only.  But things
> might have crept in since that break it even for VLA.  It's no longer
> something that matters for SVE because the architecture has been
> tightened to remove the non-power-of-2 option.
> 
> My main comment on the patch is about:
> 
> +  /* Below we reject compile-time non-constant target alignments, but if
> + our misalignment is zero, then we are known to already be aligned
> + w.r.t. any such possible target alignment.  */
> +  if (known_eq (misalignment, 0))
> +return 0;
> 
> When is that true for VLA?  It seems surprising that we can guarantee
> alignment to an unknown boundary :)  However, I agree that it's the
> natural consequence of the formula.

My vague memory is that the alignment peeling machinery forces the
dr_info->misalignment to 0 after we've decided to peel for alignment
(for DRs which we know we will have made aligned by peeling).  So the
check is designed to handle that case.

Unfortunately it's only a vague memory at this point.  Does that answer
your question, or should I go and check this in more detail?

Thanks,
Alex

> 
> Thanks,
> Richard
> 
> 


Re: [PATCH V1] RISC-V: Add the mini support for SiFive extensions.

2024-11-19 Thread Jeff Law




On 11/17/24 2:55 AM, shiyul...@iscas.ac.cn wrote:

From: yulong 

This patch add the mini support for xsfvqmaccqoq, xsfvqmaccdod and
  xsfvfnrclipxfqf extensions.

gcc/ChangeLog:

 * common/config/riscv/riscv-common.cc: New.
 * config/riscv/riscv.opt: New.

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/predef-sf-3.c: New test.
 * gcc.target/riscv/predef-sf-4.c: New test.
 * gcc.target/riscv/predef-sf-5.c: New test.

Explicitly deferring to Kito on this one.

Jeff



Re: [PATCH] RISC-V:Support N32(32-bit ABI on 64-bit ISA) in riscv

2024-11-19 Thread Jeff Law




On 11/15/24 8:21 PM, Liao Shihua wrote:

RISC-V N32 ABI means using 32-bit ABI on 64-bit ISA, the discussion in
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/381 .
At this moment, N32 is supported batemental toolchain.
Three OpenSource  RTOS using this feature and have been merged in upstream.
You can see them in
EasyXem (AUTOSAR CP R19-11): 
https://atomgit.com/easyxmen/XMen/tree/rv64ilp32-dev
Nuttx: https://github.com/apache/nuttx
RT-Thread:https://github.com/RT-Thread/rt-thread/pull/9194

This patch support N32(32-bit ABI on 64-bit ISA) in riscv
It remove option check when -march=rv64* -mabi=ilp32. And replace XLEN_SPEC in
LINK_SPEC by ABI_LEN_SPEC. In addition, it some machine descriptions.

gcc/ChangeLog:

 * config.gcc:Allow rv64* ISA use ilp32 ABI
 * config/riscv/bitmanip.md: change some machine descriptions.
 * config/riscv/elf.h (LINK_SPEC): Enable elf32 on rv64 ISA
 * config/riscv/riscv.cc (riscv_option_override): remove check ilp32 on 
rv64.
 * config/riscv/riscv.h (TARGET_ILP32): Add TARGET_ILP32 like 
TARGET_64BIT.
 (POINTER_SIZE): Change POINTER_SIZE belong to ABI.
 (Pmode): Likewise.
 (ABI_LEN_SPEC): Likewise
 * config/riscv/riscv.md: Change some machine descriptions.

---
  gcc/config.gcc   |  3 +++
  gcc/config/riscv/bitmanip.md |  4 ++--
  gcc/config/riscv/elf.h   |  2 +-
  gcc/config/riscv/riscv.cc|  3 ++-
  gcc/config/riscv/riscv.h | 10 +-
  gcc/config/riscv/riscv.md| 20 
  6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9b616bd6e1f..a1b737117a0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4828,6 +4828,9 @@ case "${target}" in
ilp32,rv32* | ilp32e,rv32e* \
| ilp32f,rv32*f* | ilp32f,rv32g* \
| ilp32d,rv32*d* | ilp32d,rv32g* \
+   | ilp32,rv64* | ilp32e,rv64e* \
+   | ilp32f,rv64*f* | ilp32f,rv64g* \
+   | ilp32d,rv64*d* | ilp32d,rv64g* \
| lp64,rv64* | lp64e,rv64e* \
| lp64f,rv64*f* | lp64f,rv64g* \
| lp64d,rv64*d* | lp64d,rv64g*)
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 06ff698bfe7..4002f05acb8 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -351,7 +351,7 @@
  {
if (TARGET_XTHEADBB && !immediate_operand (operands[2], VOIDmode))
  FAIL;
-  if (TARGET_64BIT && register_operand (operands[2], QImode))
+  if (TARGET_64BIT && !TARGET_ILP32 && register_operand (operands[2], QImode))
  {
rtx t = gen_reg_rtx (DImode);
emit_insn (gen_rotrsi3_sext (t, operands[1], operands[2]));
@@ -393,7 +393,7 @@
(match_operand:QI 2 "register_operand" "r")))]
"TARGET_ZBB || TARGET_ZBKB"
  {
-  if (TARGET_64BIT)
+  if (TARGET_64BIT && !TARGET_ILP32)
  {
rtx t = gen_reg_rtx (DImode);
emit_insn (gen_rotlsi3_sext (t, operands[1], operands[2]));
So circiling back from the pathwork meeting, I'd like to understand why 
these changes were made (and similar ones in riscv.md).


I don't immediately see how they'd be a correctness issue for ILP32, but 
I could perhaps see how they might interact with pointer masking.  I 
could also see them as a performance question/concern in the ILP32 space.


So the ask is for you to provide more details about why these changes 
were made -- what specific problem is being solved?




diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7694954c4c5..0390bece98b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10471,7 +10471,8 @@ riscv_option_override (void)
  error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
  
/* We do not yet support ILP32 on RV64.  */

-  if (BITS_PER_WORD != POINTER_SIZE)
+  if (BITS_PER_WORD != POINTER_SIZE
+  && !(BITS_PER_WORD == 64 && POINTER_SIZE == 32))
  error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
Note that I think we'll need to adjust the comment since once this work 
is completed & integrated we'll be supporting ILP32 on RV64 :-)





@@ -3818,6 +3818,10 @@
"reload_completed"
[(const_int 0)]
  {
+  if (GET_MODE (operands[0]) != Pmode)
+operands[0] = convert_to_mode (Pmode, operands[0], 0);
+  if (GET_MODE (operands[1]) != Pmode)
+operands[1] = convert_to_mode (Pmode, operands[1], 0);
riscv_set_return_address (operands[0], operands[1]);
DONE;
  })
I'd think we could change the mode of operands 0 and 1 in the pattern. 
That seems like a better way to fix this problem.  Is there a reason why 
that wouldn't work?  I'd think we could use :P for both operands to 
ensure a Pmode operand.




Jeff



Re: [PATCH v2] testsuite: arm: Only check for literal pools in no-literal-pool-m0.c

2024-11-19 Thread Richard Earnshaw (lists)
On 18/11/2024 11:13, Torbjörn SVENSSON wrote:
> Changes since v1:
> 
> - Replaced fragile checks on constants with check for literal pool
>   using "ldr r[0-9]+, \.L[0-9]+".
> 
> Ok for trunk?
> 
> --
> 
> With the changes in r15-1579-g792f97b44ff, the constants have been
> updated.
> This patch drops the fragile check on the constants and instead only
> checks that there is no literal pool generated.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/pure-code/no-literal-pool-m0.c: Only check for
>   literal pools.
> 
> Signed-off-by: Torbjörn SVENSSON 

Pedantically, we're checking for the absence of a literal pool.

Otherwise, OK.

R.

> ---
>  .../arm/pure-code/no-literal-pool-m0.c| 110 ++
>  1 file changed, 9 insertions(+), 101 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c 
> b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> index bd6f4af183b..4f9265eca85 100644
> --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
> @@ -1,176 +1,84 @@
>  /* { dg-do compile } */
>  /* { dg-skip-if "skip override" { *-*-* } { "-mfloat-abi=hard" } { "" } } */
>  /* { dg-options "-mpure-code -mcpu=cortex-m0 -march=armv6s-m -mthumb 
> -mfloat-abi=soft" } */
> -/* { dg-final { check-function-bodies "**" "" } } */
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_0:
> -**   ...
> -**   movsr[0-3], #0
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_0 ()
>  {
>return 0;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_128:
> -**   ...
> -**   movsr[0-3], #128
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_128 ()
>  {
>return 128;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_264:
> -**   ...
> -**   movsr[0-3], #132
> -**   lslsr[0-3], r[0-3], #1
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_264 ()
>  {
>return 264;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_510:
> -**   ...
> -**   movsr[0-3], #255
> -**   lslsr[0-3], r[0-3], #1
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_510 ()
>  {
>return 510;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_512:
> -**   ...
> -**   movsr[0-3], #128
> -**   lslsr[0-3], r[0-3], #2
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_512 ()
>  {
>return 512;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_764:
> -**   ...
> -**   movsr[0-3], #191
> -**   lslsr[0-3], r[0-3], #2
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_764 ()
>  {
>return 764;
>  }
>  
> -/* Does not use thumb1_gen_const_int.
> -** test_65536:
> -**   ...
> -**   movsr[0-3], #128
> -**   lslsr[0-3], r[0-3], #9
> -**   ...
> -*/
> +/* Does not use thumb1_gen_const_int.  */
>  int
>  test_65536 ()
>  {
>return 65536;
>  }
>  
> -/*
> -** test_0x123456:
> -**   ...
> -**   movsr[0-3], #18
> -**   lslsr[0-3], r[0-3], #8
> -**   addsr[0-3], r[0-3], #52
> -**   lslsr[0-3], r[0-3], #8
> -**   addsr[0-3], r[0-3], #86
> -**   ...
> -*/
>  int
>  test_0x123456 ()
>  {
>return 0x123456;
>  }
>  
> -/*
> -** test_0x1123456:
> -**   ...
> -**   movsr[0-3], #137
> -**   lslsr[0-3], r[0-3], #8
> -**   addsr[0-3], r[0-3], #26
> -**   lslsr[0-3], r[0-3], #8
> -**   addsr[0-3], r[0-3], #43
> -**   lslsr[0-3], r[0-3], #1
> -**   ...
> -*/
>  int
>  test_0x1123456 ()
>  {
>return 0x1123456;
>  }
>  
> -/* With -Os, we generate:
> -   movs r0, #16
> -   lsls r0, r0, r0
> -   With the other optimization levels, we generate:
> -   movs r0, #16
> -   lsls r0, r0, #16
> -   hence the two alternatives.  */
> -/*
> -** test_0x110:
> -**   ...
> -**   movsr[0-3], #16
> -**   lslsr[0-3], r[0-3], (#16|r[0-3])
> -**   addsr[0-3], r[0-3], #1
> -**   lslsr[0-3], r[0-3], #4
> -**   ...
> -*/
>  int
>  test_0x110 ()
>  {
>return 0x110;
>  }
>  
> -/*
> -** test_0x111:
> -**   ...
> -**   movsr[0-3], #1
> -**   lslsr[0-3], r[0-3], #24
> -**   addsr[0-3], r[0-3], #17
> -**   ...
> -*/
>  int
>  test_0x111 ()
>  {
>return 0x111;
>  }
>  
> -/*
> -** test_m8192:
> -**   ...
> -**   movsr[0-3], #1
> -**   lslsr[0-3], r[0-3], #13
> -**   rsbsr[0-3], r[0-3], #0
> -**   ...
> -*/
>  int
>  test_m8192 ()
>  {
>return -8192;
>  }
> +
> +/* { dg-final { scan-assembler-not "\tldr\tr\[0-9\]+, \\.L\[0-9\]+" } } */



Re: [PATCH v2 05/14] gimple: Handle tail padding when computing gimple_ops_offset

2024-11-19 Thread Lewis Hyatt
On Tue, Nov 19, 2024 at 9:55 AM Richard Biener
 wrote:
>
> On Sun, Nov 17, 2024 at 4:25 AM Lewis Hyatt  wrote:
> >
> > The array gimple_ops_offset_[], which is used to find the trailing op[]
> > array for a given gimple struct, is computed assuming that op[] will be
> > found at sizeof(tree) bytes away from the end of the struct. This is only
> > correct if the alignment requirement of a pointer is the same as the
> > alignment requirement of the struct, otherwise there will be padding bytes
> > that invalidate the calculation. On 64-bit platforms, this generally works
> > fine because a pointer has 8-byte alignment and none of the structs make use
> > of more than that. On 32-bit platforms, it also currently works fine because
> > there are no 64-bit integers in the gimple structs. There are 32-bit
> > platforms (e.g. sparc) on which a pointer has 4-byte alignment and a
> > uint64_t has 8-byte alignment. On such platforms, adding a uint64_t to the
> > gimple structs (as will take place when location_t is changed to be 64-bit)
> > causes gimple_ops_offset_ to be 4 bytes too large.
> >
> > It would be nice to use offsetof() to compute the offset exactly, but
> > offsetof() is not guaranteed to work for these types, because they use
> > inheritance and so are not standard layout types. This patch attempts to
> > detect the presence of tail padding by detecting when such padding is reused
> > by inheritance; the padding should generally be reused for the same reason
> > that offsetof() is not available, namely that all the relevant types use
> > inheritance. One could envision systems on which this fix does not go far
> > enough (e.g., if the ABI forbids reuse of tail padding), but it makes things
> > better without affecting anything that currently works.
> >
> > gcc/ChangeLog:
> >
> > * gimple.cc (get_tail_padding_adjustment): New function.
> > (DEFGSSTRUCT): Adjust the computation of gimple_ops_offset_ to be
> > correct in the presence of tail padding.
> > ---
> >  gcc/gimple.cc | 34 +-
> >  1 file changed, 29 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/gimple.cc b/gcc/gimple.cc
> > index f7b313be40e..f0a642f5b51 100644
> > --- a/gcc/gimple.cc
> > +++ b/gcc/gimple.cc
> > @@ -52,12 +52,36 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "ipa-modref.h"
> >  #include "dbgcnt.h"
> >
> > -/* All the tuples have their operand vector (if present) at the very bottom
> > -   of the structure.  Therefore, the offset required to find the
> > -   operands vector the size of the structure minus the size of the 1
> > -   element tree array at the end (see gimple_ops).  */
> > +/* All the tuples have their operand vector (if present) at the very 
> > bottom of
> > +   the structure.  Therefore, the offset required to find the operands 
> > vector is
> > +   the size of the structure minus the size of the 1-element tree array at 
> > the
> > +   end (see gimple_ops).  An adjustment may be required if there is tail
> > +   padding, as may happen on a host (e.g. sparc) where a pointer has 4-byte
> > +   alignment while a uint64_t has 8-byte alignment.
> > +
> > +   Unfortunately, we can't use offsetof to do this computation 100%
> > +   straightforwardly, because these structs use inheritance and so are not
> > +   standard layout types.  However, the fact that they are not standard 
> > layout
> > +   types also means that tail padding will be reused in inheritance, which 
> > makes
> > +   it possible to check for the problematic case with the following logic
> > +   instead.  If tail padding is detected, the offset should be decreased
> > +   accordingly.  */
> > +
> > +template
> > +static constexpr size_t
> > +get_tail_padding_adjustment ()
> > +{
> > +  struct padding_check : G
> > +  {
> > +tree t;
> > +  };
> > +  return sizeof (padding_check) == sizeof (G) ? sizeof (tree) : 0;
> > +}
> > +
> >  #define DEFGSSTRUCT(SYM, STRUCT, HAS_TREE_OP) \
> > -   (HAS_TREE_OP ? sizeof (struct STRUCT) - sizeof (tree) : 0),
> > +  (HAS_TREE_OP \
> > +   ? sizeof (STRUCT) - sizeof (tree) - get_tail_padding_adjustment 
> > () \
> > +   : 0),
>
> I wonder if we cannot simply use offsetof (STRUCT, ops) and some
> "trick" to avoid
> parsing/sanitizing this when HAS_TREE_OP is false?  Maybe even a
>
> template 
> constexpr size_t ops_offset () { return 0; }
>
> template 
> constexpr size_t ops_offset ()
> {
>   return offsetof (T, ops);
> /* or T x; return (char *)x.ops - (char *)x; */
> }
>
> ?  That is I don't like the "indirect" adjustment via computing the padding.
>
> Richard.

Thanks, yes, I tried to start this way as well. The template works, or
it's not hard to change gsstruct.def so that DEFGSSTRUCT(SYM, STRUCT,
HAS_TREE_OP) is instead two different macros, one for
HAS_TREE_OP==true and one for false. The issue I ran into is that:

-AFAIK it's not OK to call offsetof() on a struct which is not
standard layout. All the gimple types fail

Re: [PATCH] libgccjit: Add support for machine-dependent builtins

2024-11-19 Thread David Malcolm
On Thu, 2024-11-14 at 15:27 -0500, Antoni Boucher wrote:
> It seems we don't need to do the cleanup in i386-builtins.cc anymore,
> so 
> I removed it.
> David: Is it possible that your recent fixes for the GC within
> libgccjit 
> also fixed the issue here?
> 
> Here's the updated patch and answers below.
> 
> (GitHub link if you find it easier for review: 
> https://github.com/antoyo/libgccjit/pull/5)
> 
> Thanks.

Thanks; I looked over the patch via the above link and it looks good to
me for trunk.

Dave

> 
> Le 2024-06-26 à 18 h 49, David Malcolm a écrit :
> > On Thu, 2023-11-23 at 17:17 -0500, Antoni Boucher wrote:
> > > Hi.
> > > I did split the patch and sent one for the bfloat16 support and
> > > another
> > > one for the vector support.
> > > 
> > > Here's the updated patch for the machine-dependent builtins.
> > > 
> > 
> > Thanks for the patch; sorry about the long delay in reviewing it.
> > 
> > CCing Jan and Uros re the i386 part of that patch; for reference
> > the
> > patch being discussed is here:
> >   
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638027.html
> > 
> > >  From e025f95f4790ae861e709caf23cbc0723c1a3804 Mon Sep 17
> > > 00:00:00 2001
> > > From: Antoni Boucher 
> > > Date: Mon, 23 Jan 2023 17:21:15 -0500
> > > Subject: [PATCH] libgccjit: Add support for machine-dependent
> > > builtins
> > 
> > [...snip...]
> > 
> > > diff --git a/gcc/config/i386/i386-builtins.cc
> > > b/gcc/config/i386/i386-builtins.cc
> > > index 42fc3751676..5cc1d6f4d2e 100644
> > > --- a/gcc/config/i386/i386-builtins.cc
> > > +++ b/gcc/config/i386/i386-builtins.cc
> > > @@ -225,6 +225,22 @@ static GTY(()) tree ix86_builtins[(int)
> > > IX86_BUILTIN_MAX];
> > >   
> > >   struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
> > >   
> > > +static void
> > > +clear_builtin_types (void)
> > > +{
> > > +  for (int i = 0 ; i < IX86_BT_LAST_CPTR + 1 ; i++)
> > > +    ix86_builtin_type_tab[i] = NULL;
> > > +
> > > +  for (int i = 0 ; i < IX86_BUILTIN_MAX ; i++)
> > > +  {
> > > +    ix86_builtins[i] = NULL;
> > > +    ix86_builtins_isa[i].set_and_not_built_p = true;
> > > +  }
> > > +
> > > +  for (int i = 0 ; i < IX86_BT_LAST_ALIAS + 1 ; i++)
> > > +    ix86_builtin_func_type_tab[i] = NULL;
> > > +}
> > > +
> > >   tree get_ix86_builtin (enum ix86_builtins c)
> > >   {
> > >     return ix86_builtins[c];
> > > @@ -1483,6 +1499,8 @@ ix86_init_builtins (void)
> > >   {
> > >     tree ftype, decl;
> > >   
> > > +  clear_builtin_types ();
> > > +
> > >     ix86_init_builtin_types ();
> > >   
> > >     /* Builtins to get CPU type and features. */
> > 
> > Please can one of the i386 maintainers check this?
> > (CCing Jan and Uros: this is for the case where the compiler code
> > runs
> > multiple times in-process due to being linked into libgccjit.so. 
> > We
> > want to restore state within i386-builtins.cc to an initial state,
> > and
> > ensure that no GC-managed objects persist from previous in-memory
> > compiles).
> > 
> > > diff --git a/gcc/jit/docs/topics/compatibility.rst
> > b/gcc/jit/docs/topics/compatibility.rst
> > > index ebede440ee4..764de23341e 100644
> > > --- a/gcc/jit/docs/topics/compatibility.rst
> > > +++ b/gcc/jit/docs/topics/compatibility.rst
> > > @@ -378,3 +378,12 @@ alignment of a variable:
> > >   
> > >   ``LIBGCCJIT_ABI_25`` covers the addition of
> > >   :func:`gcc_jit_type_get_restrict`
> > > +
> > > +.. _LIBGCCJIT_ABI_26:
> > > +
> > > +``LIBGCCJIT_ABI_26``
> > > +
> > > +
> > > +``LIBGCCJIT_ABI_26`` covers the addition of a function to get
> > > target builtins:
> > > +
> > > +  * :func:`gcc_jit_context_get_target_builtin_function`
> > > diff --git a/gcc/jit/docs/topics/functions.rst
> > > b/gcc/jit/docs/topics/functions.rst
> > > index cf5cb716daf..e9b77fdb892 100644
> > > --- a/gcc/jit/docs/topics/functions.rst
> > > +++ b/gcc/jit/docs/topics/functions.rst
> > > @@ -140,6 +140,25 @@ Functions
> > >     uses such a parameter will lead to an error being emitted
> > > within
> > >     the context.
> > >   
> > > +.. function::  gcc_jit_function *\
> > > +   gcc_jit_context_get_target_builtin_function
> > > (gcc_jit_context *ctxt,\
> > > +   
> > > const char *name)
> > > +
> > > +   Get the :type:`gcc_jit_function` for the built-in function
> > > with the
> > > +   given name.  For example:
> > 
> > Might be nice to add the "(sometimes called intrinsic functions)"
> > text
> > you have in the header here.
> 
> Done
> 
> > 
> > [...snip]
> > 
> > > diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-
> > > frontend.cc
> > > index a729086bafb..3ca9702d429 100644
> > > --- a/gcc/jit/dummy-frontend.cc
> > > +++ b/gcc/jit/dummy-frontend.cc
> > 
> > [...]
> > 
> > > @@ -29,8 +30,14 @@ along with GCC; see the file COPYING3.  If not
> > > see
> > >   #include "options.h"
> > >   #include "stringpool.h"
> > >   #include "attribs.h"
> > > +#include "j

Re: [PATCH] c: Fix up __builtin_stdc_rotate_{left,right} lowering [PR117456]

2024-11-19 Thread Joseph Myers
On Tue, 19 Nov 2024, Jakub Jelinek wrote:

> Hi!
> 
> Apparently the middle-end/expansion can only handle {L,R}ROTATE_EXPR
> on types with mode precision, or large/huge BITINT_TYPE.
> So, the following patch uses the rotate exprs only in those cases
> where it can be handled, and emits code with shifts/ior otherwise.
> As types without mode precision including small/medium BITINT_TYPE
> have unlikely power of two precision and TRUNC_MOD_EXPR is on many targets
> quite expensive, I chose to expand e.g. __builtin_stdc_rotate_left (arg1,
> arg2) as
> ((tem = arg1, count = arg2 % prec)
>  ? ((tem << count) | (tem >> (prec - count))) : tem)
> rather than
> (((tem = arg1) << (count = arg2 % prec))
>  | (tem >> (-count % prec))
> (where the assignments are really save_exprs, so no UB), because
> I think another TRUNC_MOD_EXPR would be more costly in most cases
> when the shift count is non-constant (and when it is constant,
> it folds to 2 shifts by constant and ior in either case).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK (though if in future any other language wants rotates of non-mode 
precision, we should teach the middle-end to lower / expand them 
appropriately itself rather than duplicating such logic between front 
ends).

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH 02/15] testsuite: Expand coverage for `__builtin_memcpy'

2024-11-19 Thread Jeff Law




On 11/17/24 7:59 PM, Maciej W. Rozycki wrote:

Expand coverage for `__builtin_memcpy', primarily for "cpymemM" block
copy pattern, although with smaller sizes open-coded sequences may be
produced instead.

This verifies block sizes in bytes from 1 to 64, across byte alignments
of 1, 2, 4, 8 and byte misalignments within from 0 up to 7 (there's some
redundancy there for the sake of simplicity of the test cases) both for
the source and the destination, making sure all data is copied and no
data is changed outside the area meant to be written.

These choice of the ranges for the parameters has come from the Alpha
backend, whose "cpymemM" pattern covers copies being made of up to 64
bytes and has various corner cases related to base alignment and the
misalignment within.

The test cases have turned invaluable in verifying changes to the Alpha
backend, but functionality covered is generic, so I have concluded these
tests qualify for generic verification and do not have to be limited to
the Alpha-specific subset of the testsuite.

On the implementation side the tests turned out being quite stressful to
GCC and the original simpler version that just expanded all code inline
took a lot of time to complete compilation.  Depending on the target and
compilation options elapsed times up to 40 minutes (!) have been seen,
especially with GCC built at `-O0' for debugging purposes.

At the cost of increased complexity where a pair of macros is required
per variant rather than just one I have split the code into individual
functions forced not to be inlined and it improved compilation times
considerably without losing coverage.

Example compilation times with reasonably fast POWER9@2.166GHz at `-O2'
optimization and GCC built at `-O2' for various targets:

mips-linux-gnu:23s
vax-netbsdelf: 29s
alphaev56-linux-gnu:   39s
alpha-linux-gnu:   43s
powerpc64le-linux-gnu: 48s

With GCC built at `-O0':

alphaev56-linux-gnu: 3m37s
alpha-linux-gnu: 3m54s

I have therefore set the timeout factor accordingly so as to take slower
test hosts into account.

gcc/testsuite/
* gcc.c-torture/execute/memcpy-a1.c: New file.
* gcc.c-torture/execute/memcpy-a2.c: New file.
* gcc.c-torture/execute/memcpy-a4.c: New file.
* gcc.c-torture/execute/memcpy-a8.c: New file.
* gcc.c-torture/execute/memcpy-ax.h: New file.
OK.  There's some chance for timing fallouts on things like qemu 
emulated targets, but I wouldn't let that get in the way of adding 
coverage. The total memory sizes don't look terrible, so I'm not too 
concerned about how the small embedded targets would respond from a 
testing standpoint.


jeff



Re: [PATCH 1/4] RISC-V: Add Zicfiss ISA extension.

2024-11-19 Thread Jeff Law




On 11/15/24 3:53 AM, Monk Chiang wrote:

This patch is implemented according to the RISC-V CFI specification.
It supports the generation of shadow stack instructions in the prologue,
epilogue, non-local gotos, and unwinding.

RISC-V CFI SPEC: https://github.com/riscv/riscv-cfi

gcc/ChangeLog:
* common/config/riscv/riscv-common.cc: Add ZICFISS ISA string.
* gcc/config/riscv/predicates.md: New predicate x1x5_operand.
* gcc/config/riscv/riscv.cc
  (riscv_expand_prologue): Insert shadow stack instructions.
  (riscv_expand_epilogue): Likewise.
  (riscv_for_each_saved_reg): Assign t0 or ra register for
  sspopchk instruction.
  (need_shadow_stack_push_pop_p): New function. Omit shadow
  stack operation on leaf function.
* gcc/config/riscv/riscv.h
  (need_shadow_stack_push_pop_p): Define.
* gcc/config/riscv/riscv.md: Add shadow stack patterns.
  (save_stack_nonlocal): Add shadow stack instructions for setjump.
  (restore_stack_nonlocal): Add shadow stack instructions for longjump.

libgcc/ChangeLog:
* gcc/config/riscv/riscv.opt (TARGET_ZICFISS): Define.
* libgcc/config/riscv/linux-unwind.h: Include shadow-stack-unwind.h.
* libgcc/config/riscv/shadow-stack-unwind.h
  (_Unwind_Frames_Extra): Define.
  (_Unwind_Frames_Increment): Define.

gcc/testsuite/ChangeLog:
* gcc/testsuite/gcc.target/riscv/ssp-1.c: New test.
* gcc/testsuite/gcc.target/riscv/ssp-2.c: New test.
Just a note.  Kito has a far better understanding of the needs of this 
stuff than I do.  So I'm explicitly deferring review of this series to him.


jeff



Re: [patch,avr] PR54378 Reconsider the default shift costs.

2024-11-19 Thread Denis Chertykov
вт, 19 нояб. 2024 г. в 21:22, Georg-Johann Lay :
>
> This patch calculates more accurate shift costs, but makes
> the costs for larger offsets no more expensive than the costs
> for an unrolled shift.
>
> Ok for trunk?

Ok. Please apply.

Denis.


Re: [PATCH] libgccjit: Add support for machine-dependent builtins

2024-11-19 Thread Mark Wielaard
Hi,

Random request...

On Tue, Nov 19, 2024 at 11:14:38AM -0500, David Malcolm wrote:
> > Here's the updated patch and answers below.
> > 
> > (GitHub link if you find it easier for review: 
> > https://github.com/antoyo/libgccjit/pull/5)
> > 
> > Thanks.
> 
> Thanks; I looked over the patch via the above link and it looks good to
> me for trunk.

Since we now have an experimental forge at https://forge.sourceware.org
would it be an idea to use that for such reviews?

We would love to get feedback on the forge idea (but ideally one based
on Free Software and under community control).

See for some more background:
https://gcc.gnu.org/wiki/ForgeExperiment

You could sign up with your gcc ids (antoyo@gcc... or dmalcolm@gcc...).

Please sent requests for help, feedback (good or bad) to the forge
mailinglist: https://sourceware.org/mailman/listinfo/forge (You don't
need to subscribe unless you want to be part of the forge community.)

Thanks,

Mark


Re: [PATCH] v2: Add support for nonnull_if_nonzero attribute [PR117023]

2024-11-19 Thread Joseph Myers
On Wed, 13 Nov 2024, Jakub Jelinek wrote:

> On Tue, Nov 12, 2024 at 06:34:39PM +0100, Jakub Jelinek wrote:
> > What do you think about this?  So far lightly tested.
> 
> Unfortunately bootstrap/regtest revealed some issues in the patch,
> the tree-ssa-ccp.cc changes break bootstrap because fntype in there
> may be NULL and that is what get_nonnull_args handles by just returning
> NULL, but obviously TYPE_ATTRIBUTES (fntype) can't be accessed, so I've
> added if (!fntype) continue;
> And the ubsan tests worked in C but not C++ due to extra warning, so I've
> adjusted them.
> 
> This has been successfully bootstrapped/regtested on x86_64-linux and
> i686-linux.

The c-family/ changes are OK.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: Should -fsanitize=bounds support counted-by attribute for pointers inside a structure?

2024-11-19 Thread Martin Uecker
Am Dienstag, dem 19.11.2024 um 10:47 -0500 schrieb Marek Polacek:
> On Mon, Nov 18, 2024 at 07:10:35PM +0100, Martin Uecker wrote:
> > Am Montag, dem 18.11.2024 um 17:55 + schrieb Qing Zhao:
> > > Hi,
> > > 
> > > I am working on extending “counted_by” attribute to pointers inside a 
> > > structure per our previous discussion. 
> > > 
> > > I need advice on the following question:
> > > 
> > > Should -fsantize=bounds support array reference that was referenced 
> > > through a pointer that has counted_by attribute? 
> 
> I don't see why it couldn't, perhaps as part of -fsanitize=bounds-strict.
> Someone has to implement it, though.

I think Qing was volunteering to do this.  My point was that
this would not necessarily be undefined behavior, but instead
could trap for possibly defined behavior.  I would not mind, but
I point out that in the past people insisted that the sanitizers
are only intended to screen for undefined behavior.

>  
> > I think the question is what -fsanitize=bounds is meant to be.
> > 
> > I am a bit frustrated about the sanitizer.  On the
> > one hand, it is not doing enough to get spatial memory
> > safety even where this would be easily possible, on the
> > other hand, is pedantic about things which are technically
> > UB but not problematic and then one is prevented from
> > using it
> > 
> > When used in default mode, where execution continues, it
> > also does not mix well with many warning, creates more code,
> > and pulls in a libary dependency (and the library also depends
> > on upstream choices / progress which seems a limitation for
> > extensions).
> > 
> > What IMHO would be ideal is a protection mode for spatial
> > memory safety that simply adds traps (which then requires
> > no library, has no issues with other warnings, and could
> > evolve independently from clang) 
> > 
> > So shouldn't we just add a -fboundscheck (which would 
> > be like -fsanitize=bounds -fsanitize-trap=bounds just with
> > more checking) and make it really good? I think many people
> > would be very happy about this.
> 
> That's a separate concern.  We already have the -fbounds-check option,
> currently only used in Fortran (and D?), so perhaps we could make
> that option a shorthand for -fsanitize=bounds -fsanitize-trap=bounds.

I think it could share large parts of the implementation, but the
main reason for having a separate option would be to do something
better than the sanitizer.  So it could not simply be a shorthand.

Martin





Re: [PATCH] libgccjit: Add vector permutation and vector access operations

2024-11-19 Thread David Malcolm
On Thu, 2023-11-30 at 17:16 -0500, Antoni Boucher wrote:
> All of these are fixed in this new patch.
> Thanks for the review.

Thanks for the updated patch.

I had said "OK with those fixed" on the older version and it looks like
you have indeed fixed the issues I noticed, so this updated patch is OK
for trunk.

Sorry for not being clarifying earlier
Dave

> 
> On Mon, 2023-11-20 at 18:05 -0500, David Malcolm wrote:
> > On Fri, 2023-11-17 at 17:36 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds a vector permutation and vector access operations
> > > (bug
> > > 112602).
> > > 
> > > This was split from this patch:
> > > https://gcc.gnu.org/pipermail/jit/2023q1/001606.html
> > > 
> > 
> > Thanks for the patch.
> > 
> > Overall, looks good, but 3 minor nitpicks:
> > 
> > [...snip...]
> > 
> > > diff --git a/gcc/jit/docs/topics/compatibility.rst
> > > b/gcc/jit/docs/topics/compatibility.rst
> > > index ebede440ee4..a764e3968d1 100644
> > > --- a/gcc/jit/docs/topics/compatibility.rst
> > > +++ b/gcc/jit/docs/topics/compatibility.rst
> > > @@ -378,3 +378,13 @@ alignment of a variable:
> > >  
> > >  ``LIBGCCJIT_ABI_25`` covers the addition of
> > >  :func:`gcc_jit_type_get_restrict`
> > > +
> > > +
> > > +.. _LIBGCCJIT_ABI_26:
> > > +
> > > +``LIBGCCJIT_ABI_26``
> > > +
> > > +``LIBGCCJIT_ABI_26`` covers the addition of functions to
> > > manipulate vectors:
> > > +
> > > +  * :func:`gcc_jit_context_new_rvalue_vector_perm`
> > > +  * :func:`gcc_jit_context_new_vector_access`
> > > diff --git a/gcc/jit/docs/topics/expressions.rst
> > > b/gcc/jit/docs/topics/expressions.rst
> > > index 42cfee36302..4a45aa13f5c 100644
> > > --- a/gcc/jit/docs/topics/expressions.rst
> > > +++ b/gcc/jit/docs/topics/expressions.rst
> > > @@ -295,6 +295,35 @@ Vector expressions
> > >  
> > >    #ifdef
> > > LIBGCCJIT_HAVE_gcc_jit_context_new_rvalue_from_vector
> > >  
> > > +.. function:: gcc_jit_rvalue * \
> > > +  gcc_jit_context_new_rvalue_vector_perm
> > > (gcc_jit_context *ctxt, \
> > > + 
> > > gcc_jit_location *loc, \
> > > + 
> > > gcc_jit_rvalue *elements1, \
> > > + 
> > > gcc_jit_rvalue *elements2, \
> > > + 
> > > gcc_jit_rvalue *mask);
> > > +
> > > +   Build a permutation of two vectors.
> > > +
> > > +   "elements1" and "elements2" should have the same type.
> > > +   The length of "mask" and "elements1" should be the same.
> > > +   The element type of "mask" should be integral.
> > > +   The size of the element type of "mask" and "elements1" should
> > > be the same.
> > > +
> > > +   This entrypoint was added in :ref:`LIBGCCJIT_ABI_25`; you can
> > > test for
> >    ^^
> > Should be 26
> > 
> > [...snip...]
> > 
> > >  Unary Operations
> > >  
> > >  
> > > @@ -1020,3 +1049,27 @@ Field access is provided separately for
> > > both
> > > lvalues and rvalues.
> > >    PTR[INDEX]
> > >  
> > >     in C (or, indeed, to ``PTR + INDEX``).
> > > +
> > > +.. function:: gcc_jit_lvalue *\
> > > +  gcc_jit_context_new_vector_access (gcc_jit_context
> > > *ctxt,\
> > > +
> > > gcc_jit_location
> > > *loc,\
> > > + gcc_jit_rvalue
> > > *vector,\
> > > + gcc_jit_rvalue
> > > *index)
> > > +
> > > +   Given an rvalue of vector type ``T __attribute__
> > > ((__vector_size__ (SIZE)))``, get the element `T` at
> > > +   the given index.
> > > +
> > > +   This entrypoint was added in :ref:`LIBGCCJIT_ABI_25`; you can
> > > test for
> >    ^^
> > 
> > Likewise here.
> > 
> > [...snip...]
> > 
> > > @@ -4071,6 +4107,79 @@ gcc_jit_context_new_rvalue_from_vector
> > > (gcc_jit_context *ctxt,
> > >   (gcc::jit::recording::rvalue **)elements);
> > >  }
> > >  
> > > +/* Public entrypoint.  See description in libgccjit.h.
> > > +
> > > +   After error-checking, the real work is done by the
> > > +   gcc::jit::recording::context::new_rvalue_vector_perm method,
> > > in
> > > +   jit-recording.cc.  */
> > > +
> > > +gcc_jit_rvalue *
> > > +gcc_jit_context_new_rvalue_vector_perm (gcc_jit_context *ctxt,
> > > + gcc_jit_location *loc,
> > > + gcc_jit_rvalue
> > > *elements1,
> > > + gcc_jit_rvalue
> > > *elements2,
> > > + gcc_jit_rvalue *mask)
> > > +{
> > > +  RETURN_NULL_IF_FAIL (ctxt, NULL, loc, "NULL ctxt");
> > > +  JIT_LOG_FUNC (ctxt->get_logger ());
> > > +
> > > +  /* LOC can be NULL.  */
> > 
> > ...but "elements1", "elements2", and "mask" must not be NULL, as
> > 

[patch,testsuite,applied] Skip 2 tests that are not int16 clean

2024-11-19 Thread Georg-Johann Lay

Skipping these test cases that are not int16 clean (execution fail,
UB due to shift offset, etc).

Johann

--

testsuite/52641 - Skip test cases that are not 16-bit clean.

gcc/testsuite/
PR testsuite/52641
PR testsuite/116488
PR testsuite/116915
* gcc.dg/torture/pr116488.c: Require int32plus.
* gcc.dg/torture/pr116915.c: Require int32plus.commit 780720f04b0b83261d6073b92f3b02e8fbef41b9
Author: Georg-Johann Lay 
Date:   Tue Nov 19 19:32:24 2024 +0100

testsuite/52641 - Skip test cases that are not 16-bit clean.

gcc/testsuite/
PR testsuite/52641
PR testsuite/116488
PR testsuite/116915
* gcc.dg/torture/pr116488.c: Require int32plus.
* gcc.dg/torture/pr116915.c: Require int32plus.

diff --git a/gcc/testsuite/gcc.dg/torture/pr116488.c b/gcc/testsuite/gcc.dg/torture/pr116488.c
index 90457bb9315..b2af84fef43 100644
--- a/gcc/testsuite/gcc.dg/torture/pr116488.c
+++ b/gcc/testsuite/gcc.dg/torture/pr116488.c
@@ -1,5 +1,7 @@
 /* { dg-do run } */
 /* { dg-additional-options "-fno-forward-propagate" } */
+/* { dg-require-effective-target int32plus } */
+
 int a, b;
 signed char c, e;
 unsigned char d;
diff --git a/gcc/testsuite/gcc.dg/torture/pr116915.c b/gcc/testsuite/gcc.dg/torture/pr116915.c
index 9368113b364..f0ee4c0b7e4 100644
--- a/gcc/testsuite/gcc.dg/torture/pr116915.c
+++ b/gcc/testsuite/gcc.dg/torture/pr116915.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
 
 long a, b, *c = &b;
 short d, e;


Re: [PATCH] sibcall: Adjust BLKmode argument size for alignment padding

2024-11-19 Thread Richard Sandiford
"H.J. Lu"  writes:
> Adjust BLKmode argument size for parameter alignment for sibcall check.
>
> gcc/
>
> PR middle-end/117098
> * calls.cc (store_one_arg): Adjust BLKmode argument size for
> alignment padding for sibcall check.
>
> gcc/testsuite/
>
> PR middle-end/117098
> * gcc.dg/sibcall-12.c: New test.
>
> OK for master?
>
>
> H.J.
> From 8b0518906cb23a9b5e77b04d6132c49047daebd2 Mon Sep 17 00:00:00 2001
> From: "H.J. Lu" 
> Date: Sun, 13 Oct 2024 04:53:14 +0800
> Subject: [PATCH] sibcall: Adjust BLKmode argument size for alignment padding
>
> Adjust BLKmode argument size for parameter alignment for sibcall check.
>
> gcc/
>
>   PR middle-end/117098
>   * calls.cc (store_one_arg): Adjust BLKmode argument size for
>   alignment padding for sibcall check.
>
> gcc/testsuite/
>
>   PR middle-end/117098
>   * gcc.dg/sibcall-12.c: New test.
>
> Signed-off-by: H.J. Lu 
> ---
>  gcc/calls.cc  |  4 +++-
>  gcc/testsuite/gcc.dg/sibcall-12.c | 13 +
>  2 files changed, 16 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/sibcall-12.c
>
> diff --git a/gcc/calls.cc b/gcc/calls.cc
> index c5c26f65280..163c7e509d9 100644
> --- a/gcc/calls.cc
> +++ b/gcc/calls.cc
> @@ -5236,7 +5236,9 @@ store_one_arg (struct arg_data *arg, rtx argblock, int 
> flags,
> /* expand_call should ensure this.  */
> gcc_assert (!arg->locate.offset.var
> && arg->locate.size.var == 0);
> -   poly_int64 size_val = rtx_to_poly_int64 (size_rtx);
> +   /* Adjust for argument alignment padding.  */
> +   poly_int64 size_val = ROUND_UP (UINTVAL (size_rtx),
> +   parm_align / BITS_PER_UNIT);

This doesn't look right to me.  For one thing, going from
rtx_to_poly_int64 to UINTVAL drops support for non-constant parameters.
But even ignoring that, I think padding size_val (the size of arg->value
IIUC) will pessimise the later:

  else if (maybe_in_range_p (arg->locate.offset.constant,
 i, size_val))
sibcall_failure = true;

and so cause sibcall failures elsewhere.  I'm also not sure this
accurately reproduces the padding that is added by locate_and_pad_parm
for all cases (arguments that grow up vs down, padding below vs above
the argument).

AIUI, the point of the:

  if (known_eq (arg->locate.offset.constant, i))
{
  /* Even though they appear to be at the same location,
 if part of the outgoing argument is in registers,
 they aren't really at the same location.  Check for
 this by making sure that the incoming size is the
 same as the outgoing size.  */
  if (maybe_ne (arg->locate.size.constant, size_val))
sibcall_failure_1 = true;
}

that you cite in the PR is to make sure that the nth byte of arg->value
corresponds to arg->locate.offset.constant + n.  It's not clear to me
why the original fix for PR32602 didn't just check partial != 0.
Perhaps it was just for consistency with the neighbouring overlap check,
or maybe it was for some deeper reason.

Thanks,
Richard

>  
> if (known_eq (arg->locate.offset.constant, i))
>   {
> diff --git a/gcc/testsuite/gcc.dg/sibcall-12.c 
> b/gcc/testsuite/gcc.dg/sibcall-12.c
> new file mode 100644
> index 000..5773c9c1c4a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sibcall-12.c
> @@ -0,0 +1,13 @@
> +// Test for sibcall optimization with struct aligned on stack.
> +// { dg-options "-O2" }
> +// { dg-final { scan-assembler "jmp" { target i?86-*-* x86_64-*-* } } }
> +
> +struct A { char a[17]; };
> +
> +int baz (int a, int b, int c, void *p, struct A s, struct A);
> +
> +int
> +foo (int a, int b, int c, void *p, struct A s, struct A s2)
> +{
> +  return baz (a, b, c, p, s, s2);
> +}


Re: [PATCH V1] RISC-V: Add the mini support for SiFive extensions.

2024-11-19 Thread Kito Cheng
Thanks, committed to trunk :)

On Wed, Nov 20, 2024 at 2:09 AM Jeff Law  wrote:
>
>
>
> On 11/17/24 2:55 AM, shiyul...@iscas.ac.cn wrote:
> > From: yulong 
> >
> > This patch add the mini support for xsfvqmaccqoq, xsfvqmaccdod and
> >   xsfvfnrclipxfqf extensions.
> >
> > gcc/ChangeLog:
> >
> >  * common/config/riscv/riscv-common.cc: New.
> >  * config/riscv/riscv.opt: New.
> >
> > gcc/testsuite/ChangeLog:
> >
> >  * gcc.target/riscv/predef-sf-3.c: New test.
> >  * gcc.target/riscv/predef-sf-4.c: New test.
> >  * gcc.target/riscv/predef-sf-5.c: New test.
> Explicitly deferring to Kito on this one.
>
> Jeff
>


Re: [PATCH] PR target/117669 - RISC-V:The 'VEEWTRUNC4' iterator 'RVVMF2BF' type condition error

2024-11-19 Thread 钟居哲
LGTM



juzhe.zh...@rivai.ai
 
From: Feng Wang
Date: 2024-11-20 15:37
To: gcc-patches
CC: kito.cheng; jeffreyalaw; juzhe.zhong; Feng Wang
Subject: [PATCH] PR target/117669 - RISC-V:The 'VEEWTRUNC4' iterator 'RVVMF2BF' 
type condition error
This patch fix the wrong condition for RVVMF2BF. It should be
TARGET_VECTOR_ELEN_BF_16.
gcc/ChangeLog:
 
PR target/117669
* config/riscv/vector-iterators.md:
 
Signed-off-by: Feng Wang 
---
gcc/config/riscv/vector-iterators.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
 
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 6a621459cc4..92cb651ce49 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -365,7 +365,7 @@
   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF2BF "TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT")
   (RVVM2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
-- 
2.17.1
 
 


[committed] c: Fix ICE for integer constexpr initializers of wrong type [PR115515]

2024-11-19 Thread Joseph Myers
Bug 115515 (plus its duplicate 117139) reports an ICE with constexpr
initializer for an integer type variable that is not of integer type.
Fix this by not calling int_fits_type_p unless the previous check for
an integer constant expression passes.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

PR c/115515

gcc/c/
* c-typeck.cc (check_constexpr_init): Do not call int_fits_type_p
for arguments that are not integer constant expressions.

gcc/testsuite/
* gcc.dg/c23-constexpr-10.c, gcc.dg/gnu23-constexpr-2.c: New
tests.

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index a701dd090fb8..96e562c49046 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -8934,7 +8934,7 @@ check_constexpr_init (location_t loc, tree type, tree 
init,
   if (!int_const_expr)
error_at (loc, "% integer initializer is not an "
  "integer constant expression");
-  if (!int_fits_type_p (init, type))
+  else if (!int_fits_type_p (init, type))
error_at (loc, "% initializer not representable in "
  "type of object");
   return;
diff --git a/gcc/testsuite/gcc.dg/c23-constexpr-10.c 
b/gcc/testsuite/gcc.dg/c23-constexpr-10.c
new file mode 100644
index ..8ef2fced421d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c23-constexpr-10.c
@@ -0,0 +1,13 @@
+/* Test C23 constexpr.  Invalid types of integer initializers (bug 115515).  */
+/* { dg-do compile } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+
+struct s { float x; };
+const struct s i = { 3.1 };
+constexpr int j = i.x; /* { dg-error "constexpr' integer initializer is not an 
integer constant expression" } */
+
+constexpr struct s i2 = { 3.25f };
+constexpr int j2 = i2.x; /* { dg-error "constexpr' integer initializer is not 
an integer constant expression" } */
+
+constexpr int j3 = 2 * 2.5; /* { dg-error "constexpr' integer initializer is 
not an integer constant expression" } */
+constexpr int j4 = 5.0; /* { dg-error "constexpr' integer initializer is not 
an integer constant expression" } */
diff --git a/gcc/testsuite/gcc.dg/gnu23-constexpr-2.c 
b/gcc/testsuite/gcc.dg/gnu23-constexpr-2.c
new file mode 100644
index ..f570c2424f83
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gnu23-constexpr-2.c
@@ -0,0 +1,5 @@
+/* Test C23 constexpr.  Invalid types of integer initializers (bug 115515).  */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu23" } */
+
+constexpr int i = 5i; /* { dg-error "constexpr' integer initializer is not an 
integer constant expression" } */

-- 
Joseph S. Myers
josmy...@redhat.com



回复: Re: [PATCH] RISC-V:Fix wrong condition for vector-bfloat16

2024-11-19 Thread wangf...@eswincomputing.com
On 2024-11-20 14:54  钟居哲  wrote:
>
>Are you trying to fix this PR ? 
Yes.
>117669 – RISC-V:The 'VEEWTRUNC4' iterator 'RVVMF2BF' type condition error
>
>I think you should add PR target/117669 in the changelog
> 
OK.
>
>
>
>juzhe.zh...@rivai.ai
>
>From: Feng Wang
>Date: 2024-11-20 14:50
>To: gcc-patches
>CC: kito.cheng; jeffreyalaw; juzhe.zhong; Feng Wang
>Subject: [PATCH] RISC-V:Fix wrong condition for vector-bfloat16
>This patch fix the wrong condition for RVVMF2BF. It should be
>TARGET_VECTOR_ELEN_BF_16.
>gcc/ChangeLog:
>
>* config/riscv/vector-iterators.md: Modify condition.
>
>Signed-off-by: Feng Wang 
>---
>gcc/config/riscv/vector-iterators.md | 2 +-
>1 file changed, 1 insertion(+), 1 deletion(-)
>
>diff --git a/gcc/config/riscv/vector-iterators.md 
>b/gcc/config/riscv/vector-iterators.md
>index 6a621459cc4..92cb651ce49 100644
>--- a/gcc/config/riscv/vector-iterators.md
>+++ b/gcc/config/riscv/vector-iterators.md
>@@ -365,7 +365,7 @@
>   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
>   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
>-  (RVVMF2BF "TARGET_VECTOR_ELEN_FP_16")
>+  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
>   (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32 && 
>TARGET_64BIT")
>   (RVVM2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
>--
>2.17.1
>
>

Re: [PATCH] libgccjit: Add type checks in gcc_jit_block_add_assignment_op

2024-11-19 Thread David Malcolm
On Thu, 2023-12-21 at 08:36 -0500, Antoni Boucher wrote:
> Hi.
> Here's the updated patch.
> Thanks.

Sorry for the delay in responding.

The updated patch is good for trunk - thanks!

Dave

> 
> On Thu, 2023-12-07 at 20:15 -0500, David Malcolm wrote:
> > On Thu, 2023-12-07 at 17:34 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds checks gcc_jit_block_add_assignment_op to make
> > > sure
> > > it
> > > is only ever called on numeric types.
> > > 
> > > With the previous patch, this might require a change to also
> > > allow
> > > vector types here.
> > > 
> > > Thanks for the review.
> > 
> > Thanks for the patch.
> > 
> > [...snip...]
> > 
> > > @@ -2890,6 +2900,17 @@ gcc_jit_block_add_assignment_op
> > > (gcc_jit_block *block,
> > >  lvalue->get_type ()->get_debug_string (),
> > >  rvalue->get_debug_string (),
> > >  rvalue->get_type ()->get_debug_string ());
> > > +  // TODO: check if it is a numeric vector?
> > > +  RETURN_IF_FAIL_PRINTF3 (
> > > +    lvalue->get_type ()->is_numeric () && rvalue->get_type ()-
> > > > is_numeric (), ctxt, loc,
> > > +    "gcc_jit_block_add_assignment_op %s has non-numeric lvalue
> > > %s
> > > (type: %s)",
> > > +    gcc::jit::binary_op_reproducer_strings[op],
> > > +    lvalue->get_debug_string (), lvalue->get_type ()-
> > > > get_debug_string ());
> > 
> > The condition being tested here should probably just be:
> > 
> >    lvalue->get_type ()->is_numeric ()
> > 
> > since otherwise if the lvalue's type is numeric and the rvalue's
> > type
> > fails to be, then the user would incorrectly get a message about
> > the
> > lvalue.
> > 
> > > +  RETURN_IF_FAIL_PRINTF3 (
> > > +    rvalue->get_type ()->is_numeric () && rvalue->get_type ()-
> > > > is_numeric (), ctxt, loc,
> > > +    "gcc_jit_block_add_assignment_op %s has non-numeric rvalue
> > > %s
> > > (type: %s)",
> > > +    gcc::jit::binary_op_reproducer_strings[op],
> > > +    rvalue->get_debug_string (), rvalue->get_type ()-
> > > > get_debug_string ());
> > 
> > The condition being tested here seems to have a redundant repeated:
> >   && rvalue->get_type ()->is_numeric ()
> > 
> > Am I missing something, or is that a typo?
> > 
> > [...snip...]
> > 
> > The patch is OK otherwise.
> > 
> > Thanks
> > Dave
> > 
> > 
> > 
> 



Re: [PATCH v2] Fix MV clones can not redirect to specific target on some targets

2024-11-19 Thread Jeff Law




On 11/19/24 10:29 AM, Andrew Carlotti wrote:

On Sun, Oct 27, 2024 at 04:00:43PM +, Yangyu Chen wrote:

Following the implementation of commit b8ce8129a5 ("Redirect call
within specific target attribute among MV clones (PR ipa/82625)"),
we can now optimize calls by invoking a versioned function callee
from a caller that shares the same target attribute. However, on
targets that define TARGET_HAS_FMV_TARGET_ATTRIBUTE to zero, meaning
they use the "target_versions" attribute instead of "target", this
optimization is not feasible. Currently, the only target affected
by this limitation is AArch64.


The existing optimization can pick the wrong version in some cases, and fixing
this properly requires better comparisons than just a simple string comparison.
I'd prefer to just disable this optimization for aarch64 and riscv for now (and
backport that fix to gcc-14), and add the necessary target hooks to be able to
implement it properly at a later date.  (The existing bug applies if you
specify both target and target_version/target_clones attributes on the same
function, which is an unlikely combination but one that we deliberately chose
to support in aarch64).





To give a specific example, suppose we have target features featv3, featv2 and
featv1, with featv3 implying featv2 implying featv1.  Suppose we have the
following function versions:

Caller: featv2, featv1, default
Callee: featv3, featv2, featv1, default

In the featv1 and default versions of the caller, we know that we would always
select the corresponding version of the callee function, so the redirection is
valid there.

However, in the featv2 version of the caller, we don't know whether we would
select the featv2 or the featv3 versions of the callee at runtime, so we cannot
eliminate the runtime indirection.
Conservatively we could direct to featv2, ie a direct match.  Directing 
to featv3 would have to be driven by a target hook or some mechanism for 
generically handling sub/super sets.


Jeff


[PATCH] PR target/117669 - RISC-V:The 'VEEWTRUNC4' iterator 'RVVMF2BF' type condition error

2024-11-19 Thread Feng Wang
This patch fix the wrong condition for RVVMF2BF. It should be
TARGET_VECTOR_ELEN_BF_16.
gcc/ChangeLog:

PR target/117669
* config/riscv/vector-iterators.md:

Signed-off-by: Feng Wang 
---
 gcc/config/riscv/vector-iterators.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 6a621459cc4..92cb651ce49 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -365,7 +365,7 @@
 
   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF2BF "TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT")
 
   (RVVM2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
-- 
2.17.1



Re: [PATCH] sibcall: Adjust BLKmode argument size for alignment padding

2024-11-19 Thread H.J. Lu
On Wed, Nov 20, 2024 at 2:12 AM Richard Sandiford
 wrote:
>
> "H.J. Lu"  writes:
> > Adjust BLKmode argument size for parameter alignment for sibcall check.
> >
> > gcc/
> >
> > PR middle-end/117098
> > * calls.cc (store_one_arg): Adjust BLKmode argument size for
> > alignment padding for sibcall check.
> >
> > gcc/testsuite/
> >
> > PR middle-end/117098
> > * gcc.dg/sibcall-12.c: New test.
> >
> > OK for master?
> >
> >
> > H.J.
> > From 8b0518906cb23a9b5e77b04d6132c49047daebd2 Mon Sep 17 00:00:00 2001
> > From: "H.J. Lu" 
> > Date: Sun, 13 Oct 2024 04:53:14 +0800
> > Subject: [PATCH] sibcall: Adjust BLKmode argument size for alignment padding
> >
> > Adjust BLKmode argument size for parameter alignment for sibcall check.
> >
> > gcc/
> >
> >   PR middle-end/117098
> >   * calls.cc (store_one_arg): Adjust BLKmode argument size for
> >   alignment padding for sibcall check.
> >
> > gcc/testsuite/
> >
> >   PR middle-end/117098
> >   * gcc.dg/sibcall-12.c: New test.
> >
> > Signed-off-by: H.J. Lu 
> > ---
> >  gcc/calls.cc  |  4 +++-
> >  gcc/testsuite/gcc.dg/sibcall-12.c | 13 +
> >  2 files changed, 16 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/sibcall-12.c
> >
> > diff --git a/gcc/calls.cc b/gcc/calls.cc
> > index c5c26f65280..163c7e509d9 100644
> > --- a/gcc/calls.cc
> > +++ b/gcc/calls.cc
> > @@ -5236,7 +5236,9 @@ store_one_arg (struct arg_data *arg, rtx argblock, 
> > int flags,
> > /* expand_call should ensure this.  */
> > gcc_assert (!arg->locate.offset.var
> > && arg->locate.size.var == 0);
> > -   poly_int64 size_val = rtx_to_poly_int64 (size_rtx);
> > +   /* Adjust for argument alignment padding.  */
> > +   poly_int64 size_val = ROUND_UP (UINTVAL (size_rtx),
> > +   parm_align / BITS_PER_UNIT);
>
> This doesn't look right to me.  For one thing, going from
> rtx_to_poly_int64 to UINTVAL drops support for non-constant parameters.
> But even ignoring that, I think padding size_val (the size of arg->value
> IIUC) will pessimise the later:
>
>   else if (maybe_in_range_p (arg->locate.offset.constant,
>  i, size_val))
> sibcall_failure = true;
>
> and so cause sibcall failures elsewhere.  I'm also not sure this
> accurately reproduces the padding that is added by locate_and_pad_parm
> for all cases (arguments that grow up vs down, padding below vs above
> the argument).
>
> AIUI, the point of the:
>
>   if (known_eq (arg->locate.offset.constant, i))
> {
>   /* Even though they appear to be at the same location,
>  if part of the outgoing argument is in registers,
>  they aren't really at the same location.  Check for
>  this by making sure that the incoming size is the
>  same as the outgoing size.  */
>   if (maybe_ne (arg->locate.size.constant, size_val))
> sibcall_failure_1 = true;
> }

Does this

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 246abe34243..98429cc757f 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -5327,7 +5327,13 @@ store_one_arg (struct arg_data *arg, rtx
argblock, int flags,
   they aren't really at the same location.  Check for
   this by making sure that the incoming size is the
   same as the outgoing size.  */
-   if (maybe_ne (arg->locate.size.constant, size_val))
+   poly_int64 aligned_size;
+   if (CONST_INT_P (size_rtx))
+ aligned_size = ROUND_UP (UINTVAL (size_rtx),
+   parm_align / BITS_PER_UNIT);
+   else
+ aligned_size = size_val;
+   if (maybe_ne (arg->locate.size.constant, aligned_size))
  sibcall_failure = true;
  }
 else if (maybe_in_range_p (arg->locate.offset.constant,

look correct?

Thanks.

> that you cite in the PR is to make sure that the nth byte of arg->value
> corresponds to arg->locate.offset.constant + n.  It's not clear to me
> why the original fix for PR32602 didn't just check partial != 0.
> Perhaps it was just for consistency with the neighbouring overlap check,
> or maybe it was for some deeper reason.
>
> Thanks,
> Richard
>
> >
> > if (known_eq (arg->locate.offset.constant, i))
> >   {
> > diff --git a/gcc/testsuite/gcc.dg/sibcall-12.c 
> > b/gcc/testsuite/gcc.dg/sibcall-12.c
> > new file mode 100644
> > index 000..5773c9c1c4a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/sibcall-12.c
> > @@ -0,0 +1,13 @@
> > +// Test for sibcall optimization with struct aligned on stack.
> > +// { dg-options "-O2" }
> > +// { dg-final { scan-assembler "jmp" { target i?86-*-* x86_64-*-* } } }
> > +
> > +struct A { char a[17]; };
> > +
> > +int baz (int a, int b, int c, void *p, struct A s, struct A);

Re: [PATCH] libgccjit: Fix float playback for cross-compilation

2024-11-19 Thread David Malcolm
On Thu, 2024-01-25 at 16:04 -0500, Antoni Boucher wrote:
> Thanks for the review!
> On Wed, 2024-01-24 at 13:10 -0500, David Malcolm wrote:
> > On Thu, 2024-01-11 at 18:42 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch fixes the bug 113343.
> > > I'm wondering if there's a better solution than using mpfr.
> > > The only other solution I found is real_from_string, but that
> > > seems
> > > overkill to convert the number to a string.
> > > I could not find a better way to create a real value from a host
> > > double.
> > 
> > I took a look, and I don't see a better way; it seems weird to go
> > through a string stage.  Ideally there would be a
> > real_from_host_double, but I don't see one.

Sorry for the delay in responding 

> > 
> > Is there a cross-platform way to directly access the representation
> > of
> > a host double?
> 
> I have no idea.

Neither do I, but presumably this patch is fixing a real problem (no
pun intended)...

> 
> > 
> > > If there's no solution, do we lose some precision by using mpfr?
> > > Running Rust's core library tests, there was a difference of one
> > > decimal, so I'm wondering if there's some lost precision, or if
> > > it's
> > > just because those tests don't work on m68k which was my test
> > > target.
> > 
> > Sorry, can you clarify what you mean by "a difference of one
> > decimal"
> > above?
> 
> Let's say the Rust core tests expected the value "1.23456789", it
> instead got the value "1.2345678" (e.g. without the last decimal).
> Not sure if this is expected.
> Everything works fine for x86-64; this just happened for m68k which
> is
> not well supported for now in Rust, so that might just be that the
> test
> doesn't work on this platform.
> 
> > 
> > > Also, I'm not sure how to write a test this fix. Any ideas?
> > 
> > I think we don't need cross-compilation-specific tests, we should
> > just
> > use and/or extend the existing coverage for
> > gcc_jit_context_new_rvalue_from_double e.g. in test-constants.c and
> > test-types.c
> > 
> > We probably should have test coverage for "awkward" values; we
> > already
> > have coverage for DBL_MIN and DBL_MAX, but we don't yet have test
> > coverage for:
> > * quiet/signaling NaN
> > * +ve/-ve inf
> > * -ve zero

...and I'm guessing you've tested this code on all of the various
configurations you're targeting.

Assuming that, this patch is good for trunk.

> 
> Is this something you would want for this patch?

No, that's just for bonus points :)


Thanks
Dave



RE: [PATCH v1 1/2] RISC-V: Fix incorrect optimization options passing to strided ld/st test

2024-11-19 Thread Li, Pan2
> Had the discussion above been included in the patch I probably would 
> have just acked it then :-)  Now that I understand what you're doing, 
> it's fine.

I see, thank you ;).

Pan

-Original Message-
From: Jeff Law  
Sent: Tuesday, November 19, 2024 10:57 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v1 1/2] RISC-V: Fix incorrect optimization options passing 
to strided ld/st test



On 11/19/24 7:19 AM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> So if this change is the right one to make for the strided subdirectory,
>> then shouldn't it also be correct to make for the gather-scatter
>> subdirectory as well?
> 
>> And similarly for various other instances where we call dg-runtest in
>> that file.
> 
> Yes, all "" "$op" of rvv.exp need to change to "$op" "" if we would like to
> test sorts of rvv option combinations.
> 
>> Basically I'd like to see some explanation why this is the right patch
>> to make and why this case needs to be handled different from every other
>> one that I see in that file.  Assuming that explanation makes sense,
>> then some kind of comment i this file indicating why this case is
>> different seems in order.
> 
> But if we make all those changes together, there will be lots of increased 
> failure cases.
> Thus, I prefer to fix it one by one (like strided, then gather ... etc), to 
> make sure the patch
> could be friendly for review, as well as avoid any new failures of rvv.exp 
> anonymously.
> 
> Is there any best practice for such kind of changes ?
I think adjusting them one by one or in sensible groupings is fine.

Had the discussion above been included in the patch I probably would 
have just acked it then :-)  Now that I understand what you're doing, 
it's fine.

Jeff


[patch,avr] PR54378 Reconsider the default shift costs.

2024-11-19 Thread Georg-Johann Lay

This patch calculates more accurate shift costs, but makes
the costs for larger offsets no more expensive than the costs
for an unrolled shift.

Ok for trunk?

Johann

--

AVR: target/54378 - Reconsider the default shift costs.

This patch calculates more accurate shift costs, but makes
the costs for larger offsets no more expensive than the costs
for an unrolled shift.

gcc/
PR target/54378
* config/avr/avr.cc (avr_default_shift_costs): New static function.
(avr_rtx_costs_1) [ASHIFT, LSHIFTRT, ASHIFTRT]: Use it
to determine the default shift costs for shifts with a
constant shift offset.AVR: target/54378 - Reconsider the default shift costs.

This patch calculates more accurate shift costs, but makes
the costs for larger offsets no more expensive than the costs
for an unrolled shift.

gcc/
PR target/54378
* config/avr/avr.cc (avr_default_shift_costs): New static function.
(avr_rtx_costs_1) [ASHIFT, LSHIFTRT, ASHIFTRT]: Use it
to determine the default shift costs for shifts with a
constant shift offset.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 8fab896b70f..245c579a075 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12028,6 +12028,24 @@ avr_operand_rtx_cost (rtx x, machine_mode mode, rtx_code outer,
   return total;
 }
 
+
+/* Return the default shift costs for an n-byte shift with a constant
+   bit offset in terms of cycles (speed) or in terms of words (!speed).  */
+
+static int
+avr_default_shift_costs (int n_bytes, int offset, bool speed)
+{
+  int c_space = 3 + n_bytes;
+  int c_speed = offset <= 4
+? (3 + n_bytes) * offset
+// For larger offsets, don't make the speed costs more costly than
+// an unrolled shift, because we cannot rollback from an unrolled shift.
+: n_bytes * offset;
+
+  return COSTS_N_INSNS (speed ? c_speed : c_space);
+}
+
+
 /* Worker function for AVR backend's rtx_cost function.
X is rtx expression whose cost is to be calculated.
Return true if the complete cost has been computed.
@@ -12038,8 +12056,11 @@ static bool
 avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		 int /*opno*/, int *total, bool speed)
 {
-  rtx_code code = GET_CODE (x);
-  HOST_WIDE_INT val;
+  const rtx_code code = GET_CODE (x);
+  const int n_bytes = GET_MODE_SIZE (mode);
+  const HOST_WIDE_INT val1 = BINARY_P (x) && CONST_INT_P (XEXP (x, 1))
+? INTVAL (XEXP (x, 1))
+: -1;
 
   switch (code)
 {
@@ -12054,7 +12075,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   return true;
 
 case MEM:
-  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+  *total = COSTS_N_INSNS (n_bytes);
   return true;
 
 case NEG:
@@ -12068,7 +12089,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	case E_HImode:
 	case E_PSImode:
 	case E_SImode:
-	  *total = COSTS_N_INSNS (2 * GET_MODE_SIZE (mode) - 1);
+	  *total = COSTS_N_INSNS (2 * n_bytes - 1);
 	  break;
 
 	default:
@@ -12092,19 +12113,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   return true;
 
 case NOT:
-  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+  *total = COSTS_N_INSNS (n_bytes);
   *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   return true;
 
 case ZERO_EXTEND:
-  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+  *total = COSTS_N_INSNS (n_bytes
 			  - GET_MODE_SIZE (GET_MODE (XEXP (x, 0;
   *total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   code, 0, speed);
   return true;
 
 case SIGN_EXTEND:
-  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2
+  *total = COSTS_N_INSNS (n_bytes + 2
 			  - GET_MODE_SIZE (GET_MODE (XEXP (x, 0;
   *total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   code, 0, speed);
@@ -12144,13 +12165,13 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
 	  && REG_P (XEXP (x, 1)))
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (n_bytes);
 	  return true;
 	}
   if (REG_P (XEXP (x, 0))
 	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (n_bytes);
 	  return true;
 	}
 
@@ -12159,8 +12180,8 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	  && REG_P (XEXP (x, 1)))
 	{
 	  int size2 = GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0)));
-	  *total = COSTS_N_INSNS (2 + GET_MODE_SIZE (mode)
-  + (GET_MODE_SIZE (mode) > 1 + size2));
+	  *total = COSTS_N_INSNS (2 + n_bytes
+  + (n_bytes > 1 + size2));
 	  return true;
 	}
 
@@ -12249,7 +12270,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   if (REG_P (XEXP (x, 0))
 	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
 	{
-	  *total = COSTS_N_INSNS (GET_

Re: [PATCH 07/17] testsuite: arm: Use effective-target for {gcc, g++}.target/arm/ tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:23, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/arm/pr103676.C: Use effective-target
>   arm_cpu_cortex_m7.
>   * gcc.target/arm/no-volatile-in-it.c: Likewise.
>   * gcc.target/arm/fma-sp.c: Use effective-target
>   arm_cpu_cortex_m4.
>   * gcc.target/arm/pr53859.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/pr97327.c: Use effective-target
>   arm_cpu_cortex_m55.
>   * gcc.target/arm/pr65067.c: Use effective-target
>   arm_cpu_cortex_m3.
>   * lib/target-supports.exp: Define effective-target
>   arm_cpu_cortex_m3, arm_cpu_cortex_m4, arm_cpu_cortex_m7 and
>   arm_cpu_cortex_m55.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/g++.target/arm/pr103676.C   | 5 -
>  gcc/testsuite/gcc.target/arm/fma-sp.c | 8 +++-
>  gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c | 7 ---
>  gcc/testsuite/gcc.target/arm/no-volatile-in-it.c  | 5 +++--
>  gcc/testsuite/gcc.target/arm/pr53859.c| 4 +++-
>  gcc/testsuite/gcc.target/arm/pr65067.c| 4 +++-
>  gcc/testsuite/lib/target-supports.exp | 4 
>  7 files changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/gcc/testsuite/g++.target/arm/pr103676.C 
> b/gcc/testsuite/g++.target/arm/pr103676.C
> index 1607564ff5d..1e5711626f8 100644
> --- a/gcc/testsuite/g++.target/arm/pr103676.C
> +++ b/gcc/testsuite/g++.target/arm/pr103676.C
> @@ -1,6 +1,9 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_cpu_cortex_m7_ok } */
>  /* { dg-require-effective-target arm_thumb1_ok } */

This shouldn't be needed.

> -/* { dg-additional-options "-mcpu=cortex-m7 -mthumb -O2" }  */
> +/* { dg-additional-options "-O2" }  */
> +/* { dg-add-options arm_cpu_cortex_m7 } */
> +
>  
>  typedef unsigned long long uint64_t;
>  struct timer {
> diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c 
> b/gcc/testsuite/gcc.target/arm/fma-sp.c
> index e1884545f0d..539628d5d26 100644
> --- a/gcc/testsuite/gcc.target/arm/fma-sp.c
> +++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
> @@ -1,9 +1,7 @@
>  /* { dg-do compile } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { ! arm_thumb2_ok } 
> { "-march=*" } { "" } } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mcpu=*" } { "-mcpu=cortex-m4" } } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mfpu=*" } { "-mfpu=fpv4-sp-d16" } } */
> -/* { dg-skip-if "avoid conflicts with multilib options" { *-*-* } { 
> "-mfloat-abi=*" } { "-mfloat-abi=hard" } } */
> -/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb 
> -mfloat-abi=hard" } */
> +/* { dg-require-effective-target arm_cpu_cortex_m4_ok } */
> +/* { dg-options "-O2 -mfpu=fpv4-sp-d16 -mfloat-abi=hard" } */
> +/* { dg-add-options arm_cpu_cortex_m4 } */

I'd add an arm_cpu_cortex_m4_hard entry for this (see below as well); then you 
only need "-O2" in dg-options.

>  
>  #include "fma.h"
>  
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
> index d19bde59266..34b7af6022b 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr97327.c
> @@ -1,6 +1,7 @@
> -/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> -/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=hard" } { 
> "" } } */
> -/* { dg-additional-options "-mcpu=cortex-m55 -mthumb -mfloat-abi=soft 
> -mfpu=auto -Werror" } */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_cpu_cortex_m55_ok } */
> +/* { dg-additional-options "-mfloat-abi=soft -mfpu=auto -Werror" } */
> +/* { dg-add-options arm_cpu_cortex_m55 } */

This test is a bit tricky.  The original bug report talks of a false warning 
when cortex-m55, armv8.1-m.main+mve and soft ABI were used together, though I 
think -mcpu=cortex-m55 -mfloat-abi=soft was enough.  I think it should be 
enough if arm_cpu_cortex_m55 uses -mfpu=auto so you don't need it in 
dg-additional-options, but you do need an explicit "-mfloat-abi=soft -Werror": 
I'd put these *after* the dg-add-options, though, just to be on the safe side.


>  
>  int main ()
>  {
> diff --git a/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c 
> b/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
> index 6f3664d3b3d..119e9ecf321 100644
> --- a/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
> +++ b/gcc/testsuite/gcc.target/arm/no-volatile-in-it.c
> @@ -1,7 +1,8 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_cpu_cortex_m7_ok } */
>  /* { dg-require-effective-target arm_thumb2_ok } */
> -/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-march=*" "-mcpu=*" } { 
> "-mcpu=cortex-m7" } } */
> -/* { dg-options "-Os -mthumb -mcpu=cortex-m7" } */

[patch,avr] Use more bool

2024-11-19 Thread Georg-Johann Lay

Now that the C default is C23, we can use bool in avr.h
(which is still used in libgcc via tm.h).
bool is a keyword in C23, so no stdbool.h is required in libgcc.

No regressions. Ok for trunk?

Johan

--

AVR: Use more bool.

Now that the C default is C23, we can use bool in avr.h
(which is still used in libgcc via tm.h).
bool is a keyword in C23, so no stdbool.h is required in libgcc.

gcc/
* config/avr/avr.h (avr_args.has_stack_args): Be a bool.
(struct machine_function) : Same.
* config/avr/avr-protos.h (reg_unused_after)
(test_hard_reg_class, jump_over_one_insn_p): Use bool as
return type.
* config/avr/avr.cc (reg_unused_after)
(test_hard_reg_class, jump_over_one_insn_p): Same.
(cfun->machine->attributes_checked_p, cum->has_stack_args)
(cfun->machine->use_L__stack_usage, cfun->machine->gasisr.yes)
(cfun->machine->sibcall_fails): Use like a bool.AVR: Use more bool.

Now that the C default is C23, we can use bool in avr.h
(which is still used in libgcc via tm.h).
bool is a keyword in C23, so no stdbool.h is required in libgcc.

gcc/
* config/avr/avr.h (avr_args.has_stack_args): Be a bool.
(struct machine_function) : Same.
* config/avr/avr-protos.h (reg_unused_after)
(test_hard_reg_class, jump_over_one_insn_p): Use bool as
return type.
* config/avr/avr.cc (reg_unused_after)
(test_hard_reg_class, jump_over_one_insn_p): Same.
(cfun->machine->attributes_checked_p, cum->has_stack_args)
(cfun->machine->use_L__stack_usage, cfun->machine->gasisr.yes)
(cfun->machine->sibcall_fails): Use like a bool.

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ae3cc62d038..d316e0182a2 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -127,10 +127,10 @@ extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
 extern const char* avr_out_lpm (rtx_insn *, rtx*, int*);
 extern const char* avr_out_cmp_lsr (rtx_insn *, rtx*, int*);
 extern void avr_maybe_cmp_lsr (rtx *);
-extern int reg_unused_after (rtx_insn *insn, rtx reg);
+extern bool reg_unused_after (rtx_insn *insn, rtx reg);
 extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
-extern int test_hard_reg_class (enum reg_class rclass, rtx x);
-extern int jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool test_hard_reg_class (enum reg_class rclass, rtx x);
+extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest);
 
 extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand,
 int num_operands);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 8fab896b70f..0a9d26af075 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -920,7 +920,7 @@ avr_set_current_function (tree decl)
 
   /* Don't print the above diagnostics more than once.  */
 
-  cfun->machine->attributes_checked_p = 1;
+  cfun->machine->attributes_checked_p = true;
 }
 
 
@@ -973,7 +973,7 @@ static int
 avr_regs_to_save (HARD_REG_SET *set)
 {
   int count = 0;
-  int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
+  bool int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
 
   if (set)
 CLEAR_HARD_REG_SET (*set);
@@ -1138,7 +1138,7 @@ avr_return_addr_rtx (int count, rtx tem)
   else
 r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1");
 
-  cfun->machine->use_L__stack_usage = 1;
+  cfun->machine->use_L__stack_usage = true;
 
   r = gen_rtx_PLUS (Pmode, tem, r);
   r = gen_frame_mem (Pmode, memory_address (Pmode, r));
@@ -1637,7 +1637,7 @@ avr_expand_prologue (void)
 	 ZERO_REG and TMP_REG and one additional, optional register for
 	 us in an optimal way.  This even scans through inline asm.  */
 
-	  cfun->machine->gasisr.yes = 1;
+	  cfun->machine->gasisr.yes = true;
 
 	  // The optional reg or TMP_REG if we don't need one.  If we need one,
 	  // remove that reg from SET so that it's not puhed / popped twice.
@@ -2992,13 +2992,13 @@ avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname,
 {
   cum->nregs = AVR_TINY ? 1 + REG_25 - REG_20 : 1 + REG_25 - REG_8;
   cum->regno = FIRST_CUM_REG;
-  cum->has_stack_args = 0;
+  cum->has_stack_args = false;
   if (!libname && stdarg_p (fntype))
 cum->nregs = 0;
 
   /* Assume the calle may be tail called */
 
-  cfun->machine->sibcall_fails = 0;
+  cfun->machine->sibcall_fails = false;
 }
 
 
@@ -3031,7 +3031,7 @@ avr_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   if (cum->nregs && bytes <= cum->nregs)
 return gen_rtx_REG (arg.mode, cum->regno - bytes);
 
-  cum->has_stack_args = 1;
+  cum->has_stack_args = true;
 
   return NULL_RTX;
 }
@@ -3065,7 +3065,7 @@ avr_function_arg_advance (cumulative_args_t cum_v, const function_arg_info &arg)
 	 pass &args_so_far, too.  At present, CUMULATIVE_ARGS is target
 	 dependent so 

Re: [PATCH 01/17] testsuite: arm: Use effective-target for bti* and pac* tests

2024-11-19 Thread Richard Earnshaw (lists)
On 19/11/2024 10:23, Torbjörn SVENSSON wrote:
> Update test cases to use -mcpu=unset/-march=unset feature introduced in
> r15-3606-g7d6c6a0d15c.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/pac-1.c: Use effective-target
>   arm_arch_v8_1m_main_pacbti.
>   * gcc.target/arm/pac-1.c: Likewise.
>   * gcc.target/arm/pac-2.c: Likewise.
>   * gcc.target/arm/pac-3.c: Likewise.
>   * gcc.target/arm/pac-4.c: Likewise.
>   * gcc.target/arm/pac-5.c: Likewise.
>   * gcc.target/arm/pac-7.c: Likewise.
>   * gcc.target/arm/pac-8.c: Likewise.
>   * gcc.target/arm/pac-9.c: Likewise.
>   * gcc.target/arm/pac-10.c: Likewise.
>   * gcc.target/arm/pac-11.c: Likewise.
>   * gcc.target/arm/pac-sibcall.c: Likewise.
>   * gcc.target/arm/pac-sibcall-2.c: Likewise.
>   * gcc.target/arm/pac-sibcall-3.c: Likewise.
>   * gcc.target/arm/pac-12.c: Added option "-mcpu=unset".
>   * gcc.target/arm/pac-13.c: Likewise.
>   * gcc.target/arm/pac-14.c: Likewise.
>   * lib/target-supports.exp(check_effective_target_arm_pacbti_hw):

Space before '('.

>   Likewise.
>   * gcc.target/arm/pac-6.c: Use effective-target
>   arm_arch_v8_1m_main.
>   * gcc.target/arm/pac-15.c: Use effective-target
>   arm_arch_v8_1m_main_pacbti and added option "-mcpu=unset".

This is OK.  I think there are some general structural issues with the 
framework for these tests[1], but this is an incremental improvement.

R.

[1] Eg we have "dg-do run" with a hardware check and scan-assembler in the same 
test.

> 
> Signed-off-by: Torbjörn SVENSSON 
> Co-authored-by: Yvan ROUX 
> ---
>  gcc/testsuite/gcc.target/arm/pac-1.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-10.c | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-11.c | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-12.c | 2 +-
>  gcc/testsuite/gcc.target/arm/pac-13.c | 2 +-
>  gcc/testsuite/gcc.target/arm/pac-14.c | 2 +-
>  gcc/testsuite/gcc.target/arm/pac-15.c | 4 ++--
>  gcc/testsuite/gcc.target/arm/pac-2.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-3.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-4.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-5.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-6.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-7.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-8.c  | 4 +++-
>  gcc/testsuite/gcc.target/arm/pac-9.c  | 4 +++-
>  gcc/testsuite/lib/target-supports.exp | 2 +-
>  16 files changed, 39 insertions(+), 17 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/pac-1.c 
> b/gcc/testsuite/gcc.target/arm/pac-1.c
> index e0eea0858e0..a2bec355214 100644
> --- a/gcc/testsuite/gcc.target/arm/pac-1.c
> +++ b/gcc/testsuite/gcc.target/arm/pac-1.c
> @@ -1,8 +1,10 @@
>  /* Testing return address signing.  */
>  /* { dg-do run } */
> +/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_link } */
>  /* { dg-require-effective-target mbranch_protection_ok } */
>  /* { dg-require-effective-target arm_pacbti_hw } */
> -/* { dg-options "-march=armv8.1-m.main+pacbti+fp 
> -mbranch-protection=pac-ret+leaf -mthumb -mfloat-abi=hard --save-temps -O0" } 
> */
> +/* { dg-options "-mbranch-protection=pac-ret+leaf -mfloat-abi=hard 
> --save-temps -O0" } */
> +/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
>  
>  #include "pac.h"
>  
> diff --git a/gcc/testsuite/gcc.target/arm/pac-10.c 
> b/gcc/testsuite/gcc.target/arm/pac-10.c
> index 6da8434aeaf..0882dad7406 100644
> --- a/gcc/testsuite/gcc.target/arm/pac-10.c
> +++ b/gcc/testsuite/gcc.target/arm/pac-10.c
> @@ -1,7 +1,9 @@
>  /* Testing return address signing.  */
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
>  /* { dg-require-effective-target mbranch_protection_ok } */
> -/* { dg-options "-march=armv8.1-m.main+pacbti+fp -mbranch-protection=pac-ret 
> -mthumb -mfloat-abi=hard --save-temps -O0" } */
> +/* { dg-options "-mbranch-protection=pac-ret -mfloat-abi=hard --save-temps 
> -O0" } */
> +/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
>  
>  #include "pac.h"
>  
> diff --git a/gcc/testsuite/gcc.target/arm/pac-11.c 
> b/gcc/testsuite/gcc.target/arm/pac-11.c
> index 0bb727c2c80..32685a726b7 100644
> --- a/gcc/testsuite/gcc.target/arm/pac-11.c
> +++ b/gcc/testsuite/gcc.target/arm/pac-11.c
> @@ -1,7 +1,9 @@
>  /* Testing return address signing.  */
>  /* { dg-do compile } */
> +/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */
>  /* { dg-require-effective-target mbranch_protection_ok } */
> -/* { dg-options "-march=armv8.1-m.main+pacbti+fp 
> -mbranch-protection=bti+pac-ret+leaf -mthumb -mfloat-abi=hard --save-temps 
> -O2" } */
> +/* { dg-options "-mbranch-protection=bti+pac-ret+leaf -mfloat-abi=hard 
> --save-temps -O2" } */
> +/* { dg-add-options arm_arch_v8_1m_main_pacbti } */
>  
>  #include "pac.h"
>  
> diff --git a/gcc/testsuite/gcc.target/arm/pac-12.c 
> b/gcc/testsuite/gcc.target/arm/pac-12.c
> index 6e1295c834d..37bf0047c2e 100644
> --- a/gcc/testsuite

Re: [PATCH] Use nonnull_if_nonzero attribute rather than nonnull on various builtins [PR117023]

2024-11-19 Thread Joseph Myers
On Thu, 14 Nov 2024, Jakub Jelinek wrote:

> The patch adjusts builtins (when we have them) corresponding to the APIs
> mentioned in the C2Y N3322 paper:
> 1) strndup and memset get one nonnull_if_nonzero attribute instead of
>nonnull
> 2) memcpy, memmove, strncpy, memcmp, strncmp get two nonnull_if_nonzero
>attributes instead of nonnull
> 3) strncat has nonnull without argument changed to nonnull (1) and
>gets one nonnull_if_nonzero for the src argument (maybe it needs
>to be clarified in C2Y, but I really think first argument to strncat
>and wcsncat shouldn't be NULL even for n == 0, because NULL doesn't
>point to NULL terminated string and one can't append anything to it;
>and various implementations in the wild including glibc will crash
>with NULL first argument (x86_64 avx+ doesn't though)
> 
> Such changes are done also to the _chk suffixed counterparts of the
> builtins.
> 
> Furthermore I've changed a couple of builtins for POSIX functions which
> aren't covered by ISO C, but I'd expect if/when POSIX incorporates C2Y
> it would do the same changes.  In particular
> 
> 4) strnlen gets one nonnull_if_nonzero instead of nonnull
> 5) mempcpy and stpncpy get two nonnull_if_nonzero instead of nonnull
>and lose returns_nonnull attribute; this is kind of unfortunate
>but I think in the spirit of N3322 mempcpy (NULL, src, 0) should
>return NULL (i.e. dest + n aka NULL + 0, now valid) and it is hard to
>express returns non-NULL if first argument is non-NULL or third argument
>is non-zero

This seems right.

> I'm not really sure about fread/fwrite, N3322 doesn't mention those,
> can the first argument be NULL if third argument is 0?  What about
> if second argument is 0?  Can the fourth argument be NULL in such cases?

They definitely aren't covered by N3322 given that it's only amending 
certain subclauses of clause 7 (narrow and wide string functions and 
sorting and searching functions).  So the requirement for pointer 
arguments to be valid still applies.

There is the related question of whether sizes for fread and fwrite (and 
POSIX read and write, etc.) can be larger than the buffer size if it's 
known that the amount of data that can actually be read or written fits 
within the buffer.  See glibc bug 19165, where it turned out that the 
Linux kernel rejects too large buffer sizes for read with EFAULT 
regardless of the amount of data that might turn out to be available (so 
too-large sizes certainly can't work in practice at present).

This patch is OK.

-- 
Joseph S. Myers
josmy...@redhat.com



[PATCH] aarch64: Fix aarch64 after moving to C23

2024-11-19 Thread Andrew Pinski
This fixes a few aarch64 specific testcases after the move to default to GNU 
C23.
For the SME testcases, I decided to add a new one for the GNU C23 case as `()` 
changing
to mean `(void)` instead of a non-prototype declaration and add `-std=gnu17` to 
the old one.
For pic-*.c `-Wno-old-style-definition` was added not to warn about old style 
definitions.
For pr113573.c, I added `-std=gnu17` since I was not sure if `(...)` with C23 
would invoke
the same issue.

tested for aarch64-linux-gnu.

PR testsuite/117680
gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pic-constantpool1.c: Add -Wno-old-style-definition.
* gcc.target/aarch64/pic-symrefplus.c: Likewise.
* gcc.target/aarch64/pr113573.c: Add `-std=gnu17`
* gcc.target/aarch64/sme/streaming_mode_1.c: Likewise.
* gcc.target/aarch64/sme/za_state_1.c: Likewise.
* gcc.target/aarch64/sme/za_state_2.c: Likewise.
* gcc.target/aarch64/sme/streaming_mode_5.c: New test.
* gcc.target/aarch64/sme/za_state_7.c: New test.
* gcc.target/aarch64/sme/za_state_8.c: New test.

Signed-off-by: Andrew Pinski 
---
 .../gcc.target/aarch64/pic-constantpool1.c|   2 +-
 .../gcc.target/aarch64/pic-symrefplus.c   |   2 +-
 gcc/testsuite/gcc.target/aarch64/pr113573.c   |   2 +-
 .../gcc.target/aarch64/sme/streaming_mode_1.c |   2 +-
 .../gcc.target/aarch64/sme/streaming_mode_5.c | 133 +++
 .../gcc.target/aarch64/sme/za_state_1.c   |   2 +-
 .../gcc.target/aarch64/sme/za_state_2.c   |   2 +-
 .../gcc.target/aarch64/sme/za_state_7.c   | 160 ++
 .../gcc.target/aarch64/sme/za_state_8.c   |  77 +
 9 files changed, 376 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_7.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_8.c

diff --git a/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c 
b/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
index 755c0b67ea4..1a5da9aacfa 100644
--- a/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -mcmodel=small -fPIC" }  */
+/* { dg-options "-Wno-old-style-definition -O2 -mcmodel=small -fPIC" }  */
 /* { dg-do compile } */
 /* { dg-require-effective-target fpic } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c 
b/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
index 0c5e7fe7fb4..ca019ce3b33 100644
--- a/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
+++ b/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -mcmodel=small -fPIC -fno-builtin" }  */
+/* { dg-options "-Wno-old-style-definition -O2 -mcmodel=small -fPIC 
-fno-builtin" }  */
 /* { dg-do compile } */
 /* { dg-require-effective-target fpic } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113573.c 
b/gcc/testsuite/gcc.target/aarch64/pr113573.c
index fc8607f7218..30175c4cb5c 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr113573.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr113573.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -std=gnu17" } */
 
 #pragma GCC aarch64 "arm_neon.h"
 typedef __Uint8x8_t uint8x8_t;
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c 
b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c
index 8874b05b882..40a79c5e600 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c
@@ -1,4 +1,4 @@
-// { dg-options "" }
+// { dg-options "-std=gnu17" }
 
 void sc_a () [[arm::streaming_compatible]];
 void sc_a (); // { dg-error "conflicting types" }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_5.c
new file mode 100644
index 000..dbc332b7d2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_5.c
@@ -0,0 +1,133 @@
+// { dg-options "" }
+
+// This is the GNU C23+ version oftreaming_mode_1.c
+// () means (void) rather than not a prototype declaration
+
+void sc_a () [[arm::streaming_compatible]];
+void sc_a (); // { dg-error "conflicting types" }
+
+void sc_b ();
+void sc_b () [[arm::streaming_compatible]]; // { dg-error "conflicting types" }
+
+void sc_c () [[arm::streaming_compatible]];
+void sc_c () {} // { dg-error "conflicting types" }
+
+void sc_d ();
+void sc_d () [[arm::streaming_compatible]] {} // { dg-error "conflicting 
types" }
+
+void sc_e () [[arm::streaming_compatible]] {}
+void sc_e (); // { dg-error "conflicting types" }
+
+void sc_f () {}
+void sc_f () [[arm::streaming_compatible]]; // { dg-error "conflicting types" }
+
+extern void (*sc_g) ();
+extern void (*sc_g) () [[arm::streaming_compatible]]; // { dg-error 
"conflicting types" }
+
+extern void (*sc_h) () [[arm::streaming_compatible]];
+extern void (*sc_h) 

  1   2   >