[gcc r15-6830] More memory leak fixes

2025-01-12 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:bedf26c201894d340a4eb61927879fd0d82e3102

commit r15-6830-gbedf26c201894d340a4eb61927879fd0d82e3102
Author: Richard Biener 
Date:   Fri Jan 10 16:25:35 2025 +0100

More memory leak fixes

The following were found compiling SPEC CPU 2017 with valgrind.

* tree-vect-slp.cc (vect_analyze_slp): Release saved_stmts
vector.
(vect_build_slp_tree_2): Release new_oprnds_info when not
used.
(vect_analyze_slp): Release root_stmts when gcond SLP
build fails.

Diff:
---
 gcc/tree-vect-slp.cc | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 337506419d92..02e7f5c4d587 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2678,6 +2678,8 @@ out:
  nops = 1;
  has_two_operators_perm = true;
}
+  else
+   vect_free_oprnd_info (new_oprnds_info);
 }
 
   auto_vec children;
@@ -4951,8 +4953,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
   max_tree_size, &limit,
   bst_map, NULL, force_single_lane);
}
- saved_stmts.release ();
}
+ saved_stmts.release ();
}
 
   /* Make sure to vectorize only-live stmts, usually inductions.  */
@@ -5013,10 +5015,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
  stmts.create (1);
  stmts.quick_push (vect_stmt_to_vectorize (varg));
 
- vect_build_slp_instance (vinfo, slp_inst_kind_gcond,
-  stmts, roots, remain,
-  max_tree_size, &limit,
-  bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo, slp_inst_kind_gcond,
+stmts, roots, remain,
+max_tree_size, &limit,
+bst_map, NULL, force_single_lane))
+   roots.release ();
}
 
/* Find and create slp instances for inductions that have been forced


[gcc r15-6831] Fix union member access for EXEC_INQUIRE.

2025-01-12 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:40754a3b9bef83bf4da0675fcb378e8cd1675602

commit r15-6831-g40754a3b9bef83bf4da0675fcb378e8cd1675602
Author: Thomas Koenig 
Date:   Sun Jan 12 13:05:25 2025 +0100

Fix union member access for EXEC_INQUIRE.

gcc/fortran/ChangeLog:

PR fortran/118432
* frontend-passes.cc (doloop_code): Select correct member
of co->ext.union for inquire.

Diff:
---
 gcc/fortran/frontend-passes.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc
index 3a3328d44508..6ee6ce4c3ff1 100644
--- a/gcc/fortran/frontend-passes.cc
+++ b/gcc/fortran/frontend-passes.cc
@@ -2552,7 +2552,7 @@ doloop_code (gfc_code **c, int *walk_subtrees 
ATTRIBUTE_UNUSED,
   break;
 
 case EXEC_INQUIRE:
-  if (co->ext.filepos->err)
+  if (co->ext.inquire->err)
seen_goto = true;
   break;


[gcc r15-6833] Alpha: Restore frame pointer last in `builtin_longjmp' [PR64242]

2025-01-12 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:3cf0e6ab2aa9e7cb9a406079ff19856a6461d9f0

commit r15-6833-g3cf0e6ab2aa9e7cb9a406079ff19856a6461d9f0
Author: Maciej W. Rozycki 
Date:   Sun Jan 12 16:48:53 2025 +

Alpha: Restore frame pointer last in `builtin_longjmp' [PR64242]

Add similar arrangements to `builtin_longjmp' for Alpha as with commit
71b144289c1c ("re PR middle-end/64242 (Longjmp expansion incorrect)")
and commit 511ed59d0b04 ("Fix PR64242 - Longjmp expansion incorrect"),
so as to restore the frame pointer last, so that accesses to a local
buffer supplied can still be fulfilled with memory accesses via the
original frame pointer, fixing:

FAIL: gcc.c-torture/execute/pr64242.c   -O0  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -O1  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -Os  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -O2 -flto -fno-use-linker-plugin 
-flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr64242.c   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-objects  execution test

and adding no regressions in `alpha-linux-gnu' testing.

gcc/
PR middle-end/64242
* config/alpha/alpha.md (`builtin_longjmp'): Restore frame
pointer last.  Add frame clobber and schedule blockage.

Diff:
---
 gcc/config/alpha/alpha.md | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 35c8030422f5..178ce992206d 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -5005,14 +5005,28 @@
   rtx pv = gen_rtx_REG (Pmode, 27);
 
   /* This bit is the same as expand_builtin_longjmp.  */
+
   emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
   emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
-  emit_move_insn (hard_frame_pointer_rtx, fp);
+
   emit_move_insn (pv, lab);
+
+  /* Restore the frame pointer and stack pointer.  We must use a
+ temporary since the setjmp buffer may be a local.  */
+  fp = copy_to_reg (fp);
   emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  /* Ensure the frame pointer move is not optimized.  */
+  emit_insn (gen_blockage ());
+  emit_clobber (hard_frame_pointer_rtx);
+  emit_clobber (frame_pointer_rtx);
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+
   emit_use (hard_frame_pointer_rtx);
   emit_use (stack_pointer_rtx);
 
+  /* End of the bit corresponding to expand_builtin_longjmp.  */
+
   /* Load the label we are jumping through into $27 so that we know
  where to look for it when we get back to setjmp's function for
  restoring the gp.  */


[gcc r15-6834] Alpha: Always respect -mbwx, -mcix, -mfix, -mmax, and their inverse

2025-01-12 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:19fdb9f3792d4c3c9ff3d18dc4566bb16e62de60

commit r15-6834-g19fdb9f3792d4c3c9ff3d18dc4566bb16e62de60
Author: Maciej W. Rozycki 
Date:   Sun Jan 12 16:48:53 2025 +

Alpha: Always respect -mbwx, -mcix, -mfix, -mmax, and their inverse

Contrary to user documentation the `-mbwx', `-mcix', `-mfix', `-mmax'
feature options and their inverse forms are ignored whenever `-mcpu='
option is in effect, either by having been given explicitly or where
configured as the default such as with the `alphaev56-linux-gnu' target.
In the latter case there is no way to change the settings these options
are supposed to tweak other than with `-mcpu=' and the settings cannot
be individually controlled, making all the feature options permanently
inactive.

It seems a regression from commit 7816bea0e23b ("config.gcc: Reorganize
--with-cpu logic.") back in 2003, which replaced the setting of the
default feature mask with the setting of the default CPU across a few
targets, and the complementing logic in the Alpha backend wasn't updated
accordingly.

Fix this by making the individual feature options take precedence over
`-mcpu='.  Add test cases to verify this is the case, and to cover the
defaults as well for the boundary cases.

This has a drawback where the order of the options is ignored between
`-mcpu=' and these individual options, so e.g. `-mno-bwx -mcpu=ev6' will
keep the BWX feature disabled even though `-mcpu=ev6' comes later in the
command line.  This may affect some scenarios involving user overrides
such as with CFLAGS passed to `configure' and `make' invocations.  I do
believe it has been our practice anyway for more finegrained options to
override group options regardless of their relative order on the command
line and in any case using `-mcpu=ev6 -mbwx' as the override will do the
right thing if required, canceling any previous `-mno-bwx'.

This has been spotted with `alphaev56-linux-gnu' target verification and
a recently added test case:

FAIL: gcc.target/alpha/stwx0.c   -O1   scan-assembler-times \\sldq_u\\s 2
FAIL: gcc.target/alpha/stwx0.c   -O1   scan-assembler-times \\smskwh\\s 1
FAIL: gcc.target/alpha/stwx0.c   -O1   scan-assembler-times \\smskwl\\s 1
FAIL: gcc.target/alpha/stwx0.c   -O1   scan-assembler-times \\sstq_u\\s 2

(and similarly for the remaining optimization levels covered) which this
fix has addressed.

gcc/
* config/alpha/alpha.cc (alpha_option_override): Ignore CPU
flags corresponding to features the enabling or disabling of
which has been requested with an individual feature option.

gcc/testsuite/
* gcc.target/alpha/target-bwx-1.c: New file.
* gcc.target/alpha/target-bwx-2.c: New file.
* gcc.target/alpha/target-bwx-3.c: New file.
* gcc.target/alpha/target-bwx-4.c: New file.
* gcc.target/alpha/target-cix-1.c: New file.
* gcc.target/alpha/target-cix-2.c: New file.
* gcc.target/alpha/target-cix-3.c: New file.
* gcc.target/alpha/target-cix-4.c: New file.
* gcc.target/alpha/target-fix-1.c: New file.
* gcc.target/alpha/target-fix-2.c: New file.
* gcc.target/alpha/target-fix-3.c: New file.
* gcc.target/alpha/target-fix-4.c: New file.
* gcc.target/alpha/target-max-1.c: New file.
* gcc.target/alpha/target-max-2.c: New file.
* gcc.target/alpha/target-max-3.c: New file.
* gcc.target/alpha/target-max-4.c: New file.

Diff:
---
 gcc/config/alpha/alpha.cc | 5 +++--
 gcc/testsuite/gcc.target/alpha/target-bwx-1.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-bwx-2.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-bwx-3.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-bwx-4.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-cix-1.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-cix-2.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-cix-3.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-cix-4.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-fix-1.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-fix-2.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-fix-3.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-fix-4.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-max-1.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-max-2.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-max-3.c | 6 ++
 gcc/testsuite/gcc.target/alpha/target-max-4.c | 6 ++
 17 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 030dc7728859..958a785ffd0e 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -460,8 +460,9 @@ alpha_option_override (void)

[gcc r15-6835] Alpha: Optimize block moves coming from longword-aligned source

2025-01-12 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:4e557210b7f9fd669ff66c6958327eb2d4262d80

commit r15-6835-g4e557210b7f9fd669ff66c6958327eb2d4262d80
Author: Maciej W. Rozycki 
Date:   Sun Jan 12 16:48:53 2025 +

Alpha: Optimize block moves coming from longword-aligned source

Now that we have proper alignment determination for block moves in place
the case of copying a block of longword-aligned data has become real, so
implement the merging of loaded data from pairs of SImode registers into
single DImode registers for the purpose of using with unaligned stores
efficiently, as suggested by a comment in `alpha_expand_block_move' and
discard the comment.  Provide test cases accordingly.

gcc/
* config/alpha/alpha.cc (alpha_expand_block_move): Merge loaded
data from pairs of SImode registers into single DImode registers
if to be used with unaligned stores.

gcc/testsuite/
* gcc.target/alpha/memcpy-si-aligned.c: New file.
* gcc.target/alpha/memcpy-si-unaligned.c: New file.
* gcc.target/alpha/memcpy-si-unaligned-dst.c: New file.
* gcc.target/alpha/memcpy-si-unaligned-src.c: New file.
* gcc.target/alpha/memcpy-si-unaligned-src-bwx.c: New file.

Diff:
---
 gcc/config/alpha/alpha.cc  | 45 +++
 gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c | 16 +++
 .../gcc.target/alpha/memcpy-si-unaligned-dst.c | 16 +++
 .../gcc.target/alpha/memcpy-si-unaligned-src-bwx.c | 11 +
 .../gcc.target/alpha/memcpy-si-unaligned-src.c | 15 +++
 .../gcc.target/alpha/memcpy-si-unaligned.c | 51 ++
 6 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 958a785ffd0e..8ec9e8c5d399 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3931,14 +3931,44 @@ alpha_expand_block_move (rtx operands[])
 {
   words = bytes / 4;
 
-  for (i = 0; i < words; ++i)
-   data_regs[nregs + i] = gen_reg_rtx (SImode);
+  /* Load an even quantity of SImode data pieces only.  */
+  unsigned int hwords = words / 2;
+  for (i = 0; i / 2 < hwords; ++i)
+   {
+ data_regs[nregs + i] = gen_reg_rtx (SImode);
+ emit_move_insn (data_regs[nregs + i],
+ adjust_address (orig_src, SImode, ofs + i * 4));
+   }
 
-  for (i = 0; i < words; ++i)
-   emit_move_insn (data_regs[nregs + i],
-   adjust_address (orig_src, SImode, ofs + i * 4));
+  /* If we'll be using unaligned stores, merge data from pairs
+of SImode registers into DImode registers so that we can
+store it more efficiently via quadword unaligned stores.  */
+  unsigned int j;
+  if (dst_align < 32)
+   for (i = 0, j = 0; i < words / 2; ++i, j = i * 2)
+ {
+   rtx hi = expand_simple_binop (DImode, ASHIFT,
+ data_regs[nregs + j + 1],
+ GEN_INT (32), NULL_RTX,
+ 1, OPTAB_WIDEN);
+   data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi,
+   data_regs[nregs + j],
+   NULL_RTX,
+   1, OPTAB_WIDEN);
+ }
+  else
+   j = i;
 
-  nregs += words;
+  /* Take care of any remaining odd trailing SImode data piece.  */
+  if (j < words)
+   {
+ data_regs[nregs + i] = gen_reg_rtx (SImode);
+ emit_move_insn (data_regs[nregs + i],
+ adjust_address (orig_src, SImode, ofs + j * 4));
+ ++i;
+   }
+
+  nregs += i;
   bytes -= words * 4;
   ofs += words * 4;
 }
@@ -4057,13 +4087,12 @@ alpha_expand_block_move (rtx operands[])
 }
 
   /* Due to the above, this won't be aligned.  */
-  /* ??? If we have more than one of these, consider constructing full
- words in registers and using alpha_expand_unaligned_store_words.  */
   while (i < nregs && GET_MODE (data_regs[i]) == SImode)
 {
   alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
   ofs += 4;
   i++;
+  gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
 }
 
   if (dst_align >= 16)
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c
new file mode 100644
index ..2572a3187e9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned int aligned_src_si[17] = { [0 ... 16] = 0xeaebeced };
+unsigned int aligned_dst_si[17] = { [0 ... 16] = 0xdcdbdad9 };
+
+void
+memcpy_aligned_data_si

[gcc r15-6836] Alpha: Fix a block move pessimisation with zero-extension after LDWU

2025-01-12 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:ed8cd42d138fa048e0c0eff1ea28b39f5abe1c29

commit r15-6836-ged8cd42d138fa048e0c0eff1ea28b39f5abe1c29
Author: Maciej W. Rozycki 
Date:   Sun Jan 12 16:48:54 2025 +

Alpha: Fix a block move pessimisation with zero-extension after LDWU

For the BWX case we have a pessimisation in `alpha_expand_block_move'
for HImode loads where we place the data loaded into a HImode register
as well, therefore losing information that indeed the data loaded has
already been zero-extended to the full DImode width of the register.
Later on when we store this data in QImode quantities into an unaligned
destination, we zero-extend it again for the purpose of right-shifting,
such as with the test case included producing code at `-O2' as follows:

ldah $2,unaligned_src_hi($29)   !gprelhigh
lda $1,unaligned_src_hi($2) !gprellow
ldwu $6,unaligned_src_hi($2)!gprellow
ldwu $5,2($1)
ldwu $4,4($1)
bis $31,$31,$31
zapnot $6,3,$3  # Redundant!
ldbu $7,6($1)
zapnot $5,3,$2  # Redundant!
stb $6,0($16)
zapnot $4,3,$1  # Redundant!
stb $5,2($16)
srl $3,8,$3
stb $4,4($16)
srl $2,8,$2
stb $3,1($16)
srl $1,8,$1
stb $2,3($16)
stb $1,5($16)
stb $7,6($16)

The non-BWX case is unaffected, because there we use byte insertion, so
we don't care that data is held in a HImode register.

Address this by making the holding RTX a HImode subreg of the original
DImode register, which the RTL passes can then see through and eliminate
the zero-extension where otherwise required, resulting in this shortened
code:

ldah $2,unaligned_src_hi($29)   !gprelhigh
lda $1,unaligned_src_hi($2) !gprellow
ldwu $4,unaligned_src_hi($2)!gprellow
ldwu $3,2($1)
ldwu $2,4($1)
bis $31,$31,$31
srl $4,8,$6
ldbu $1,6($1)
srl $3,8,$5
stb $4,0($16)
stb $6,1($16)
srl $2,8,$4
stb $3,2($16)
stb $5,3($16)
stb $2,4($16)
stb $4,5($16)
stb $1,6($16)

While at it reformat the enclosing do-while statement according to the
GNU Coding Standards, observing that in this case it does not obfuscate
the change owing to the odd original indentation.

gcc/
* config/alpha/alpha.cc (alpha_expand_block_move): Use a HImode
subreg of a DImode register to hold data from an aligned HImode
load.

Diff:
---
 gcc/config/alpha/alpha.cc   | 17 +++--
 .../gcc.target/alpha/memcpy-hi-unaligned-dst.c  | 16 
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 8ec9e8c5d399..6965ece16d0b 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -3999,14 +3999,19 @@ alpha_expand_block_move (rtx operands[])
   if (bytes >= 2)
 {
   if (src_align >= 16)
-   {
- do {
-   data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
-   emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+   do
+ {
+   tmp = gen_reg_rtx (DImode);
+   emit_move_insn (tmp,
+   expand_simple_unop (DImode, SET,
+   adjust_address (orig_src,
+   HImode, ofs),
+   NULL_RTX, 1));
+   data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0);
bytes -= 2;
ofs += 2;
- } while (bytes >= 2);
-   }
+ }
+   while (bytes >= 2);
   else if (! TARGET_BWX)
{
  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c 
b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c
new file mode 100644
index ..4e3c02f5b906
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+unsigned short unaligned_src_hi[4];
+
+void
+memcpy_unaligned_dst_hi (void *dst)
+{
+  __builtin_memcpy (dst, unaligned_src_hi, 7);
+}
+
+/* { dg-final { scan-assembler-times "\\sldwu\\s" 3 } } */
+/* { dg-final { scan-assembler-times "\\sldbu\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 7 } } */
+/* { dg-final { scan-assembler-not "\\szapnot\\s" } } */


[gcc r15-6832] Alpha: Add memory clobbers to `builtin_longjmp' expansion

2025-01-12 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:46861167f548ec622918d95acd2424b64f56797d

commit r15-6832-g46861167f548ec622918d95acd2424b64f56797d
Author: Maciej W. Rozycki 
Date:   Sun Jan 12 16:48:53 2025 +

Alpha: Add memory clobbers to `builtin_longjmp' expansion

Add the same memory clobbers to `builtin_longjmp' for Alpha as with
commit 41439bf6a647 ("builtins.c (expand_builtin_longjmp): Added two
memory clobbers."), to prevent instructions that access memory via the
frame or stack pointer from being moved across the write to the frame
pointer.

gcc/
* config/alpha/alpha.md (builtin_longjmp): Add memory clobbers.

Diff:
---
 gcc/config/alpha/alpha.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 376c4cba90c5..35c8030422f5 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -5005,6 +5005,8 @@
   rtx pv = gen_rtx_REG (Pmode, 27);
 
   /* This bit is the same as expand_builtin_longjmp.  */
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
   emit_move_insn (hard_frame_pointer_rtx, fp);
   emit_move_insn (pv, lab);
   emit_stack_restore (SAVE_NONLOCAL, stack);


[gcc r15-6837] Fortran: implement F2018 intrinsic OUT_OF_RANGE [PR115788]

2025-01-12 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:f8eda60e12dabaf5e9501104781ef5eba334cff7

commit r15-6837-gf8eda60e12dabaf5e9501104781ef5eba334cff7
Author: Harald Anlauf 
Date:   Sun Jan 12 19:26:35 2025 +0100

Fortran: implement F2018 intrinsic OUT_OF_RANGE [PR115788]

Implementation of the Fortran 2018 standard intrinsic OUT_OF_RANGE, with
the GNU Fortran extension to unsigned integers.

Runtime code is fully inline expanded.

PR fortran/115788

gcc/fortran/ChangeLog:

* check.cc (gfc_check_out_of_range): Check arguments to intrinsic.
* expr.cc (free_expr0): Fix a memleak with unsigned literals.
* gfortran.h (enum gfc_isym_id): Define GFC_ISYM_OUT_OF_RANGE.
* gfortran.texi: Add OUT_OF_RANGE to list of intrinsics supporting
UNSIGNED.
* intrinsic.cc (add_functions): Add Fortran prototype.  Break some
nearby lines with excessive length.
* intrinsic.h (gfc_check_out_of_range): Add prototypes.
* intrinsic.texi: Fortran documentation of OUT_OF_RANGE.
* simplify.cc (gfc_simplify_out_of_range): Compile-time 
simplification
of OUT_OF_RANGE.
* trans-intrinsic.cc (gfc_conv_intrinsic_out_of_range): Generate
inline expansion of runtime code for OUT_OF_RANGE.
(gfc_conv_intrinsic_function): Use it.

gcc/testsuite/ChangeLog:

* gfortran.dg/ieee/out_of_range.f90: New test.
* gfortran.dg/out_of_range_1.f90: New test.
* gfortran.dg/out_of_range_2.f90: New test.
* gfortran.dg/out_of_range_3.f90: New test.

Diff:
---
 gcc/fortran/check.cc|  42 +
 gcc/fortran/expr.cc |   1 +
 gcc/fortran/gfortran.h  |   1 +
 gcc/fortran/gfortran.texi   |   7 +-
 gcc/fortran/intrinsic.cc|  28 +++-
 gcc/fortran/intrinsic.h |   2 +
 gcc/fortran/intrinsic.texi  |  67 
 gcc/fortran/simplify.cc | 208 
 gcc/fortran/trans-intrinsic.cc  | 196 ++
 gcc/testsuite/gfortran.dg/ieee/out_of_range.f90 |  65 
 gcc/testsuite/gfortran.dg/out_of_range_1.f90|  91 +++
 gcc/testsuite/gfortran.dg/out_of_range_2.f90| 115 +
 gcc/testsuite/gfortran.dg/out_of_range_3.f90|  25 +++
 13 files changed, 835 insertions(+), 13 deletions(-)

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index e29ad3986110..35458643835c 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -4864,6 +4864,48 @@ gfc_check_null (gfc_expr *mold)
 }
 
 
+bool
+gfc_check_out_of_range (gfc_expr *x, gfc_expr *mold, gfc_expr *round)
+{
+  if (!int_or_real_or_unsigned_check (x, 0))
+return false;
+
+  if (mold == NULL)
+return false;
+
+  if (!int_or_real_or_unsigned_check (mold, 1))
+return false;
+
+  if (!scalar_check (mold, 1))
+return false;
+
+  if (round)
+{
+  if (!type_check (round, 2, BT_LOGICAL))
+   return false;
+
+  if (!scalar_check (round, 2))
+   return false;
+
+  if (x->ts.type != BT_REAL
+ || (mold->ts.type != BT_INTEGER && mold->ts.type != BT_UNSIGNED))
+   {
+ gfc_error ("%qs argument of %qs intrinsic at %L shall appear "
+"only if %qs is of type REAL and %qs is of type "
+"INTEGER or UNSIGNED",
+gfc_current_intrinsic_arg[2]->name,
+gfc_current_intrinsic, &round->where,
+gfc_current_intrinsic_arg[0]->name,
+gfc_current_intrinsic_arg[1]->name);
+
+ return false;
+   }
+}
+
+  return true;
+}
+
+
 bool
 gfc_check_pack (gfc_expr *array, gfc_expr *mask, gfc_expr *vector)
 {
diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index 0e40b2493a5c..7f3f6c52fb54 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -466,6 +466,7 @@ free_expr0 (gfc_expr *e)
   switch (e->ts.type)
{
case BT_INTEGER:
+   case BT_UNSIGNED:
  mpz_clear (e->value.integer);
  break;
 
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 6293d85778c0..70913e3312b2 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -626,6 +626,7 @@ enum gfc_isym_id
   GFC_ISYM_NULL,
   GFC_ISYM_NUM_IMAGES,
   GFC_ISYM_OR,
+  GFC_ISYM_OUT_OF_RANGE,
   GFC_ISYM_PACK,
   GFC_ISYM_PARITY,
   GFC_ISYM_PERROR,
diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index 116667245932..d3fe0935aa44 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -2830,6 +2830,7 @@ The following intrinsics take unsigned arguments:
 @item @code{MODULO}, @pxref{MODULO}
 @item @code{MVBITS}, @pxref{MVBITS}
 @item @code{NOT}, @pxref{NOT}
+@item @code{OUT_OF_RANGE}, @pxref{OUT_OF_RANGE}
 @

[gcc r15-6838] c: UX improvements to 'too {few, many} arguments' errors (v5) [PR118112]

2025-01-12 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:a236f70617213343f3075ee43e8d9f5882dca400

commit r15-6838-ga236f70617213343f3075ee43e8d9f5882dca400
Author: David Malcolm 
Date:   Sun Jan 12 13:46:31 2025 -0500

c: UX improvements to 'too {few,many} arguments' errors (v5) [PR118112]

Consider this case of a bad call to a callback function (perhaps
due to C23 changing the meaning of () in function decls):

struct p {
int (*bar)();
};

void baz() {
struct p q;
q.bar(1);
}

Before this patch the C frontend emits:

t.c: In function 'baz':
t.c:7:5: error: too many arguments to function 'q.bar'
7 | q.bar(1);
  | ^

which doesn't give the user much help in terms of knowing what
was expected, and where the relevant declaration is.

With this patch the C frontend emits:

t.c: In function 'baz':
t.c:7:5: error: too many arguments to function 'q.bar'; expected 0, have 1
7 | q.bar(1);
  | ^ ~
t.c:2:15: note: declared here
2 | int (*bar)();
  |   ^~~

(showing the expected vs actual counts, the pertinent field decl, and
underlining the first extraneous argument at the callsite)

Similarly, the patch also updates the "too few arguments" case to also
show expected vs actual counts.  Doing so requires a tweak to the
wording to say "at least" for the case of variadic fns where
previously the C FE emitted e.g.:

s.c: In function 'test':
s.c:5:3: error: too few arguments to function 'callee'
5 |   callee ();
  |   ^~
s.c:1:6: note: declared here
1 | void callee (const char *, ...);
  |  ^~

with this patch it emits:

s.c: In function 'test':
s.c:5:3: error: too few arguments to function 'callee'; expected at least 
1, have 0
5 |   callee ();
  |   ^~
s.c:1:6: note: declared here
1 | void callee (const char *, ...);
  |  ^~

gcc/c/ChangeLog:
PR c/118112
* c-typeck.cc (inform_declaration): Add "function_expr" param and
use it for cases where we couldn't show the function decl to show
field decls for callbacks.
(build_function_call_vec): Add missing auto_diagnostic_group.
Update for new param of inform_declaration.
(convert_arguments): Likewise.  For the "too many arguments" case
add the expected vs actual counts to the message, and if we have
it, add the location_t of the first surplus param as a secondary
location within the diagnostic.  For the "too few arguments" case,
determine the minimum number of arguments required and add the
expected vs actual counts to the message, tweaking it to "at least"
for variadic functions.

gcc/testsuite/ChangeLog:
PR c/118112
* gcc.dg/too-few-arguments.c: New test.
* gcc.dg/too-many-arguments.c: New test.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/c/c-typeck.cc | 77 
 gcc/testsuite/gcc.dg/too-few-arguments.c  | 28 +++
 gcc/testsuite/gcc.dg/too-many-arguments.c | 83 +++
 3 files changed, 177 insertions(+), 11 deletions(-)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 6e40f7edf02a..cd9290160d7a 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -3737,14 +3737,30 @@ build_function_call (location_t loc, tree function, 
tree params)
   return ret;
 }
 
-/* Give a note about the location of the declaration of DECL.  */
+/* Give a note about the location of the declaration of DECL,
+   or, failing that, a pertinent declaration for FUNCTION_EXPR.  */
 
 static void
-inform_declaration (tree decl)
+inform_declaration (tree decl, tree function_expr)
 {
   if (decl && (TREE_CODE (decl) != FUNCTION_DECL
   || !DECL_IS_UNDECLARED_BUILTIN (decl)))
 inform (DECL_SOURCE_LOCATION (decl), "declared here");
+  else if (function_expr)
+switch (TREE_CODE (function_expr))
+  {
+  default:
+   break;
+  case COMPONENT_REF:
+   /* Show the decl of the pertinent field (e.g. for callback
+  fields in a struct.  */
+   {
+ tree field_decl = TREE_OPERAND (function_expr, 1);
+ if (location_t loc = DECL_SOURCE_LOCATION (field_decl))
+   inform (loc, "declared here");
+   }
+   break;
+  }
 }
 
 /* C implementation of callback for use when checking param types.  */
@@ -3819,10 +3835,11 @@ build_function_call_vec (location_t loc, 
vec arg_loc,
  function);
   else if (DECL_P (function))
{
+ auto_diagnostic_group d;
  error_at (loc,
"called object %qD is not a function or function pointer",
funct

[gcc r15-6845] lto: Fix empty fnctl.h build error with MinGW.

2025-01-12 Thread Michal Jires via Gcc-cvs
https://gcc.gnu.org/g:89ebb88d1d73ea8f693f2195321b402c31186abe

commit r15-6845-g89ebb88d1d73ea8f693f2195321b402c31186abe
Author: Michal Jires 
Date:   Mon Jan 13 01:58:41 2025 +0100

lto: Fix empty fnctl.h build error with MinGW.

MSYS2+MinGW contains headers without defining expected contents.
This fix checks that the fcntl function is actually defined.

Bootstrapped/regtested on x86_64-linux. Committed as obvious.

gcc/ChangeLog:

* lockfile.cc (LOCKFILE_USE_FCNTL): New.
(lockfile::lock_write): Use LOCKFILE_USE_FCNTL.
(lockfile::try_lock_write): Use LOCKFILE_USE_FCNTL.
(lockfile::lock_read): Use LOCKFILE_USE_FCNTL.
(lockfile::unlock): Use LOCKFILE_USE_FCNTL.
(lockfile::lockfile_supported): Use LOCKFILE_USE_FCNTL.

Diff:
---
 gcc/lockfile.cc | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/lockfile.cc b/gcc/lockfile.cc
index b385c295851f..cecbb86491da 100644
--- a/gcc/lockfile.cc
+++ b/gcc/lockfile.cc
@@ -22,6 +22,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "lockfile.h"
 
+/* fcntl.h may exist without expected contents.  */
+#if HAVE_FCNTL_H && HOST_HAS_F_SETLKW
+#define LOCKFILE_USE_FCNTL 1
+#endif
 
 /* Unique write lock.  No other lock can be held on this lockfile.
Blocking call.  */
@@ -32,7 +36,7 @@ lockfile::lock_write ()
   if (fd < 0)
 return -1;
 
-#if HAVE_FCNTL_H
+#ifdef LOCKFILE_USE_FCNTL
   struct flock s_flock;
 
   s_flock.l_whence = SEEK_SET;
@@ -57,7 +61,7 @@ lockfile::try_lock_write ()
   if (fd < 0)
 return -1;
 
-#if HAVE_FCNTL_H
+#ifdef LOCKFILE_USE_FCNTL
   struct flock s_flock;
 
   s_flock.l_whence = SEEK_SET;
@@ -87,7 +91,7 @@ lockfile::lock_read ()
   if (fd < 0)
 return -1;
 
-#if HAVE_FCNTL_H
+#ifdef LOCKFILE_USE_FCNTL
   struct flock s_flock;
 
   s_flock.l_whence = SEEK_SET;
@@ -108,7 +112,7 @@ lockfile::unlock ()
 {
   if (fd < 0)
 {
-#if HAVE_FCNTL_H
+#ifdef LOCKFILE_USE_FCNTL
   struct flock s_flock;
 
   s_flock.l_whence = SEEK_SET;
@@ -128,7 +132,7 @@ lockfile::unlock ()
 bool
 lockfile::lockfile_supported ()
 {
-#if HAVE_FCNTL_H
+#ifdef LOCKFILE_USE_FCNTL
   return true;
 #else
   return false;


[gcc r15-6846] lto: Pass cache checksum by reference [PR118181]

2025-01-12 Thread Michal Jires via Gcc-cvs
https://gcc.gnu.org/g:9100be5741329dfe7bd49d6cf60be1771b9bb3ea

commit r15-6846-g9100be5741329dfe7bd49d6cf60be1771b9bb3ea
Author: Michal Jires 
Date:   Mon Jan 13 02:49:58 2025 +0100

lto: Pass cache checksum by reference [PR118181]

Bootstrapped/regtested on x86_64-linux. Committed as obvious.

PR lto/118181

gcc/ChangeLog:

* lto-ltrans-cache.cc (ltrans_file_cache::create_item):
Pass checksum by reference.
* lto-ltrans-cache.h: Likewise.

Diff:
---
 gcc/lto-ltrans-cache.cc | 2 +-
 gcc/lto-ltrans-cache.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/lto-ltrans-cache.cc b/gcc/lto-ltrans-cache.cc
index 22c0bffaed59..c57775fae851 100644
--- a/gcc/lto-ltrans-cache.cc
+++ b/gcc/lto-ltrans-cache.cc
@@ -309,7 +309,7 @@ ltrans_file_cache::save_cache ()
 
Must be called with creation_lock held to prevent data race.  */
 ltrans_file_cache::item*
-ltrans_file_cache::create_item (checksum_t checksum)
+ltrans_file_cache::create_item (const checksum_t& checksum)
 {
   size_t prefix_len = cache_prefix.size ();
 
diff --git a/gcc/lto-ltrans-cache.h b/gcc/lto-ltrans-cache.h
index b95f63c33357..5fef44bae538 100644
--- a/gcc/lto-ltrans-cache.h
+++ b/gcc/lto-ltrans-cache.h
@@ -108,7 +108,7 @@ private:
  New input/output files are chosen to not collide with other items.
 
  Must be called with creation_lock held to prevent data race.  */
-  item* create_item (checksum_t checksum);
+  item* create_item (const checksum_t& checksum);
 
   /* Prunes oldest unused cache items over limit.
  Must be called with deletion_lock held to prevent data race.  */


[gcc(refs/users/aoliva/heads/testme)] [ifcombine] propagate signbit mask to XOR right-hand operand

2025-01-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:7f21e67697013a0ae714c5198145c5ae029c942d

commit 7f21e67697013a0ae714c5198145c5ae029c942d
Author: Alexandre Oliva 
Date:   Sun Jan 12 22:16:21 2025 -0300

[ifcombine] propagate signbit mask to XOR right-hand operand

If a single-bit bitfield takes up the sign bit of a storage unit,
comparing the corresponding bitfield between two objects loads the
storage units, XORs them, converts the result to signed char, and
compares it with zero: ((signed char)(a. ^ c.) >= 0).

fold_truth_andor_for_ifcombine recognizes the compare with zero as a
sign bit test, then it decomposes the XOR into an equality test.

The problem is that, after this decomposition, that figures out the
width of the accessed fields, we apply the sign bit mask to the
left-hand operand of the compare, but we failed to also apply it to
the right-hand operand when both were taken from the same XOR.

This patch fixes that.


for  gcc/ChangeLog

PR tree-optimization/118409
* gimple-fold.cc (fold_truth_andor_for_ifcombine): Apply the
signbit mask to the right-hand XOR operand too.

for  gcc/testsuite/ChangeLog

PR tree-optimization/118409
* gcc.dg/field-merge-20.c: New.

Diff:
---
 gcc/gimple-fold.cc| 20 +++
 gcc/testsuite/gcc.dg/field-merge-20.c | 64 +++
 2 files changed, 84 insertions(+)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a3987c4590ae..93ed8b3abb05 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8270,6 +8270,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
ll_and_mask = sign;
   else
ll_and_mask &= sign;
+  if (l_xor)
+   {
+ if (!lr_and_mask.get_precision ())
+   lr_and_mask = sign;
+ else
+   lr_and_mask &= sign;
+ if (l_const.get_precision ())
+   l_const &= wide_int::from (lr_and_mask,
+  l_const.get_precision (), UNSIGNED);
+   }
 }
 
   if (rsignbit)
@@ -8279,6 +8289,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
rl_and_mask = sign;
   else
rl_and_mask &= sign;
+  if (r_xor)
+   {
+ if (!rr_and_mask.get_precision ())
+   rr_and_mask = sign;
+ else
+   rr_and_mask &= sign;
+ if (r_const.get_precision ())
+   r_const &= wide_int::from (rr_and_mask,
+  r_const.get_precision (), UNSIGNED);
+   }
 }
 
   /* If either comparison code is not correct for our logical operation,
diff --git a/gcc/testsuite/gcc.dg/field-merge-20.c 
b/gcc/testsuite/gcc.dg/field-merge-20.c
new file mode 100644
index ..44ac7fae50dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-20.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+/* tree-optimization/118409 */
+
+/* Check that tests involving a sign bit of a storage unit are handled
+   correctly.  The compares are turned into xor tests by earlier passes, and
+   ifcombine has to propagate the sign bit mask to the right hand of the
+   compare extracted from the xor, otherwise we'll retain unwanted bits for the
+   compare.  */
+
+typedef struct {
+int p : __CHAR_BIT__;
+int d : 1;
+int b : __CHAR_BIT__ - 2;
+int e : 1;
+} g;
+
+g a = {.d = 1, .e = 1}, c = {.b = 1, .d = 1, .e = 1};
+
+__attribute__((noipa))
+int f1 ()
+{
+  if (a.d == c.d
+  && a.e == c.e)
+return 0;
+  return -1;
+}
+
+__attribute__((noipa))
+int f2 ()
+{
+  if (a.d != c.d
+  || a.e != c.e)
+return -1;
+  return 0;
+}
+
+__attribute__((noipa))
+int f3 ()
+{
+  if (c.d == a.d
+  && c.e == a.e)
+return 0;
+  return -1;
+}
+
+__attribute__((noipa))
+int f4 ()
+{
+  if (c.d != a.d
+  || c.e != a.e)
+return -1;
+  return 0;
+}
+
+int main() {
+  if (f1 () < 0
+  || f2 () < 0
+  || f3 () < 0
+  || f4 () < 0)
+__builtin_abort();
+  return 0;
+}


[gcc/aoliva/heads/testme] [ifcombine] propagate signbit mask to XOR right-hand operan

2025-01-12 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 7f21e6769701... [ifcombine] propagate signbit mask to XOR right-hand operan

It previously pointed to:

 cc2aaa9ac0d3... [ifcombine] propagate signbit mask to xor right-hand operan

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  cc2aaa9... [ifcombine] propagate signbit mask to xor right-hand operan


Summary of changes (added commits):
---

  7f21e67... [ifcombine] propagate signbit mask to XOR right-hand operan


[gcc/aoliva/heads/testme] (167 commits) [ifcombine] propagate signbit mask to xor right-hand operan

2025-01-12 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 cc2aaa9ac0d3... [ifcombine] propagate signbit mask to xor right-hand operan

It previously pointed to:

 f419ad18d1ba... [ifcombine] drop other misuses of uniform_integer_cst_p

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  f419ad1... [ifcombine] drop other misuses of uniform_integer_cst_p
  98ead44... [ifcombine] fix mask variable test to match use [PR118344]
  b88da51... [ifcombine] reuse left-hand mask to decode right-hand xor o
  1a5cf10... [ifcombine] adjust for narrowing converts before shifts [PR
  41dd4aa... testsuite: generalized field-merge tests for <32-bit int [P
  fe67e1e... testsuite: generalize ifcombine field-merge tests [PR118025
  aa7a47f... ifcombine field-merge: improve handling of dwords
  d19ac11... [testsuite] rearrange requirements for dfp bitint run tests


Summary of changes (added commits):
---

  cc2aaa9... [ifcombine] propagate signbit mask to xor right-hand operan
  47ac6ca... [ifcombine] drop other misuses of uniform_integer_cst_p (*)
  fd4e979... [ifcombine] fix mask variable test to match use [PR118344] (*)
  740c849... [ifcombine] reuse left-hand mask to decode right-hand xor o (*)
  c96a6c2... [ifcombine] adjust for narrowing converts before shifts [PR (*)
  d3c91b0... testsuite: generalized field-merge tests for <32-bit int [P (*)
  261ffe6... testsuite: generalize ifcombine field-merge tests [PR118025 (*)
  38401c5... ifcombine field-merge: improve handling of dwords (*)
  d019ab4... ipa-cp: Fold-convert values when necessary (PR 118138) (*)
  86175a6... nvptx: Add '__builtin_frame_address(0)' test case (*)
  91dec10... nvptx: Add '__builtin_stack_address()' test case (*)
  f447c3c... testsuite: arm: Use -std=c17 and effective-target arm_arch_ (*)
  3ff216b... ada: Incorrect accessibilty level for library level subprog (*)
  c92f9f0... ada: Remove empty line. (*)
  c43a533... ada: Set syntactic node properties immediately when crating (*)
  8c850dd... ada: Turn Is_Effective_Use_Clause from syntactic to semanti (*)
  2b27522... ada: Reorder syntactic node fields to match the Ada RM gram (*)
  38a13ea... c++: Fix up ICEs on constexpr inline asm strings in templat (*)
  933f0c2... c++: Fix up modules handling of namespace scope structured  (*)
  f5e488c... fortran: use_iso_fortran_env_module tweaks [PR118337] (*)
  bd28244... c++: improve some modules comments (*)
  6fe3950... c++: modules, generic lambda, constexpr if (*)
  e8a5788... LoongArch: Opitmize the cost of vec_construct. (*)
  979ca3b... Daily bump. (*)
  2d0f345... RISC-V: testsuite: fix target selector for sync_char_short (*)
  08b6e87... AArch64: Fix costing of emulated gathers/scatters [PR118188 (*)
  fab96de... [PR118017][LRA]: Don't inherit reg of non-uniform reg class (*)
  3cae3a8... c++: be permissive about eh spec mismatch for op new (*)
  424a9ac... testsuite: arm: Fix typo in gcc.target/arm/armv8_2-fp16-con (*)
  8e41205... s390: Add testcase for just fixed PR118362 (*)
  21571cd... c: Restore warning for incomplete structures declared in pa (*)
  681934a... testsuite: arm: Use -Os in memset-inline-8* tests (*)
  794f672... testsuite: arm: Verify asm per function for armv8_2-fp16-co (*)
  c6b5430... c, c++: preserve type name in conversion [PR116060] (*)
  04f4ac9... testsuite: Require trampolines for gcc.dg/pr118325.c (*)
  2f31819... s390: Fix s390_constant_via_vgbm_p() [PR118362] (*)
  ca79349... c++: ICE during requires-expr partial subst [PR118060] (*)
  27d620d... c++: tf_partial and instantiate_template [PR117887] (*)
  76d1061... c++: constexpr potentiality of CAST_EXPR [PR117925] (*)
  eeedc54... c++: relax ICE for unexpected trees during constexpr [PR117 (*)
  57904dc... c++: current inst w/ indirect dependent bases [PR117993] (*)
  40f0f6a... c++: template-id dependence wrt local static arg [PR117792] (*)
  8231019... arm: [MVE intrinsics] Another fix for moves of tuples (PR t (*)
  310c8a6... 'git mv gcc/testsuite/gcc.dg/{,torture/}crc-linux-3.c' (*)
  3861d36... nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65 (*)
  1db025c... Avoid PHI node re-allocation in loop copying (*)
  3b69427... ada: Fix missing detection of late equality operator return (*)
  f622acc... ada: Accept predefined multiply operator for fixed point in (*)
  d107140... Fortran: Cylce detection for non vtypes only. [PR118337] (*)
  14879ba... ree: Skip extension on fixed register (*)
  659b70b... ada: Error on Disable_Controlled aspect in Multiway_Trees (*)
  aa086b7... ada: Cleanup preanalysis of static expressions (part 3) (*)
  2cbd440... match.pd: Avoid introducing UB in the a r<< (32-b) -> a r>> (*)
  c5e71d2... fortran: Accept "15" modules for compatibility [PR118337] (*)
  b37628e... i386: Remove not used model number for Diamond Rapids (*)
  00b77db... RISC-V: Refine registered_functions list for rvv overloaded 

[gcc/aoliva/heads/testbase] (166 commits) [ifcombine] drop other misuses of uniform_integer_cst_p

2025-01-12 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 47ac6ca9cb08... [ifcombine] drop other misuses of uniform_integer_cst_p

It previously pointed to:

 1b1a33f76879... expand: drop stack adjustments after barrier [PR118006]

Diff:

Summary of changes (added commits):
---

  47ac6ca... [ifcombine] drop other misuses of uniform_integer_cst_p (*)
  fd4e979... [ifcombine] fix mask variable test to match use [PR118344] (*)
  740c849... [ifcombine] reuse left-hand mask to decode right-hand xor o (*)
  c96a6c2... [ifcombine] adjust for narrowing converts before shifts [PR (*)
  d3c91b0... testsuite: generalized field-merge tests for <32-bit int [P (*)
  261ffe6... testsuite: generalize ifcombine field-merge tests [PR118025 (*)
  38401c5... ifcombine field-merge: improve handling of dwords (*)
  d019ab4... ipa-cp: Fold-convert values when necessary (PR 118138) (*)
  86175a6... nvptx: Add '__builtin_frame_address(0)' test case (*)
  91dec10... nvptx: Add '__builtin_stack_address()' test case (*)
  f447c3c... testsuite: arm: Use -std=c17 and effective-target arm_arch_ (*)
  3ff216b... ada: Incorrect accessibilty level for library level subprog (*)
  c92f9f0... ada: Remove empty line. (*)
  c43a533... ada: Set syntactic node properties immediately when crating (*)
  8c850dd... ada: Turn Is_Effective_Use_Clause from syntactic to semanti (*)
  2b27522... ada: Reorder syntactic node fields to match the Ada RM gram (*)
  38a13ea... c++: Fix up ICEs on constexpr inline asm strings in templat (*)
  933f0c2... c++: Fix up modules handling of namespace scope structured  (*)
  f5e488c... fortran: use_iso_fortran_env_module tweaks [PR118337] (*)
  bd28244... c++: improve some modules comments (*)
  6fe3950... c++: modules, generic lambda, constexpr if (*)
  e8a5788... LoongArch: Opitmize the cost of vec_construct. (*)
  979ca3b... Daily bump. (*)
  2d0f345... RISC-V: testsuite: fix target selector for sync_char_short (*)
  08b6e87... AArch64: Fix costing of emulated gathers/scatters [PR118188 (*)
  fab96de... [PR118017][LRA]: Don't inherit reg of non-uniform reg class (*)
  3cae3a8... c++: be permissive about eh spec mismatch for op new (*)
  424a9ac... testsuite: arm: Fix typo in gcc.target/arm/armv8_2-fp16-con (*)
  8e41205... s390: Add testcase for just fixed PR118362 (*)
  21571cd... c: Restore warning for incomplete structures declared in pa (*)
  681934a... testsuite: arm: Use -Os in memset-inline-8* tests (*)
  794f672... testsuite: arm: Verify asm per function for armv8_2-fp16-co (*)
  c6b5430... c, c++: preserve type name in conversion [PR116060] (*)
  04f4ac9... testsuite: Require trampolines for gcc.dg/pr118325.c (*)
  2f31819... s390: Fix s390_constant_via_vgbm_p() [PR118362] (*)
  ca79349... c++: ICE during requires-expr partial subst [PR118060] (*)
  27d620d... c++: tf_partial and instantiate_template [PR117887] (*)
  76d1061... c++: constexpr potentiality of CAST_EXPR [PR117925] (*)
  eeedc54... c++: relax ICE for unexpected trees during constexpr [PR117 (*)
  57904dc... c++: current inst w/ indirect dependent bases [PR117993] (*)
  40f0f6a... c++: template-id dependence wrt local static arg [PR117792] (*)
  8231019... arm: [MVE intrinsics] Another fix for moves of tuples (PR t (*)
  310c8a6... 'git mv gcc/testsuite/gcc.dg/{,torture/}crc-linux-3.c' (*)
  3861d36... nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65 (*)
  1db025c... Avoid PHI node re-allocation in loop copying (*)
  3b69427... ada: Fix missing detection of late equality operator return (*)
  f622acc... ada: Accept predefined multiply operator for fixed point in (*)
  d107140... Fortran: Cylce detection for non vtypes only. [PR118337] (*)
  14879ba... ree: Skip extension on fixed register (*)
  659b70b... ada: Error on Disable_Controlled aspect in Multiway_Trees (*)
  aa086b7... ada: Cleanup preanalysis of static expressions (part 3) (*)
  2cbd440... match.pd: Avoid introducing UB in the a r<< (32-b) -> a r>> (*)
  c5e71d2... fortran: Accept "15" modules for compatibility [PR118337] (*)
  b37628e... i386: Remove not used model number for Diamond Rapids (*)
  00b77db... RISC-V: Refine registered_functions list for rvv overloaded (*)
  1bb367b... OpenMP: declare variant's append_args + dispatch interop fi (*)
  5f61fb4... Daily bump. (*)
  b7f1686... nvptx: For '-march=sm_52' and higher, default at least to ' (*)
  ecb99f6... nvptx: Support '-mptx=7.3' (*)
  975638b... nvptx: Add effective-target 'nvptx_softstack', use for effe (*)
  e5180fb... c++: Honor complain in cp_build_function_call_vec for check (*)
  dcbd260... nvptx: Clarify that the PTX "native" stack pointer is handl (*)
  1823170... nvptx: Handle '__builtin_stack_save()' in a well-behaved wa (*)
  2116e8d... nvptx: Add '__builtin_stack_save()', '__builtin_stack_resto (*)
  678c3f0... nvptx: Add '__builtin_alloca(0)' test cases [PR65181] (*)
  36eee5a... gcc/configure: Fix check for assembler section merging supp (*)
  c42261d... c++: d

[gcc(refs/users/aoliva/heads/testme)] [ifcombine] propagate signbit mask to xor right-hand operand

2025-01-12 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:cc2aaa9ac0d31f8f4098c4276e0695afb7f63fcf

commit cc2aaa9ac0d31f8f4098c4276e0695afb7f63fcf
Author: Alexandre Oliva 
Date:   Sun Jan 12 22:16:21 2025 -0300

[ifcombine] propagate signbit mask to xor right-hand operand

Diff:
---
 gcc/gimple-fold.cc| 20 
 gcc/testsuite/gcc.dg/field-merge-20.c | 44 +++
 2 files changed, 64 insertions(+)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a3987c4590ae..93ed8b3abb05 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8270,6 +8270,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
ll_and_mask = sign;
   else
ll_and_mask &= sign;
+  if (l_xor)
+   {
+ if (!lr_and_mask.get_precision ())
+   lr_and_mask = sign;
+ else
+   lr_and_mask &= sign;
+ if (l_const.get_precision ())
+   l_const &= wide_int::from (lr_and_mask,
+  l_const.get_precision (), UNSIGNED);
+   }
 }
 
   if (rsignbit)
@@ -8279,6 +8289,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, 
tree truth_type,
rl_and_mask = sign;
   else
rl_and_mask &= sign;
+  if (r_xor)
+   {
+ if (!rr_and_mask.get_precision ())
+   rr_and_mask = sign;
+ else
+   rr_and_mask &= sign;
+ if (r_const.get_precision ())
+   r_const &= wide_int::from (rr_and_mask,
+  r_const.get_precision (), UNSIGNED);
+   }
 }
 
   /* If either comparison code is not correct for our logical operation,
diff --git a/gcc/testsuite/gcc.dg/field-merge-20.c 
b/gcc/testsuite/gcc.dg/field-merge-20.c
new file mode 100644
index ..3c1ec0cbd80f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-20.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+/* tree-optimization/118409 */
+
+/* Check that tests involving a sign bit of a storage unit are handled
+   correctly.  The compares are turned into xor tests by earlier passes, and 
ifcombine has to propagate the sign bit mask to the right hand of the compare 
extracted from the */
+
+typedef struct {
+int p : __CHAR_BIT__;
+int d : 1;
+int b : __CHAR_BIT__ - 2;
+int e : 1;
+int f;
+} g;
+
+g a = {.d = 1, .e = 1}, c = {.b = 1, .d = 1, .e = 1};
+
+__attribute__((noipa))
+int f1 ()
+{
+  if (a.d == c.d
+  && a.e == c.e
+  && a.f == 0)
+return 0;
+  return -1;
+}
+
+__attribute__((noipa))
+int f2 ()
+{
+  if (a.d != c.d
+  || a.e != c.e
+  || a.f != 0)
+return -1;
+  return 0;
+}
+
+int main() {
+  if (f1 () < 0
+  || f2 () < 0)
+__builtin_abort();
+  return 0;
+}


[gcc r15-6844] Refactor ix86_expand_vecop_qihi2.

2025-01-12 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:0e05b793fba2a9bea9f0fbb1f068679f5dadf514

commit r15-6844-g0e05b793fba2a9bea9f0fbb1f068679f5dadf514
Author: liuhongt 
Date:   Wed Jan 8 23:11:17 2025 -0800

Refactor ix86_expand_vecop_qihi2.

Since there's regression to use vpermq, and it's manually disabled by
!TARGET_AVX512BW. I remove the codes related to vpermq and make
ix86_expand_vecop_qihi2 only handle vpmovbw + op + vpmovwb case.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
Refactor to avoid redundant TARGET_AVX512BW in many places.

Diff:
---
 gcc/config/i386/i386-expand.cc | 39 +--
 1 file changed, 5 insertions(+), 34 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 2ab57874234b..da030832bba7 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24864,11 +24864,9 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, 
rtx op1, rtx op2)
  generic permutation to merge the data back into the right place.  This
  permutation results in VPERMQ, which is slow, so better fall back to
  ix86_expand_vecop_qihi.  */
-  if (!TARGET_AVX512BW)
-return false;
-
-  if ((qimode == V16QImode && !TARGET_AVX2)
-  || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
+  if (!TARGET_AVX512BW
+  || (qimode == V16QImode && !TARGET_AVX512VL)
+  || (qimode == V32QImode && !TARGET_EVEX512)
   /* There are no V64HImode instructions.  */
   || qimode == V64QImode)
  return false;
@@ -24883,8 +24881,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, 
rtx op1, rtx op2)
 {
 case E_V16QImode:
   himode = V16HImode;
-  if (TARGET_AVX512VL && TARGET_AVX512BW)
-   gen_truncate = gen_truncv16hiv16qi2;
+  gen_truncate = gen_truncv16hiv16qi2;
   break;
 case E_V32QImode:
   himode = V32HImode;
@@ -24926,33 +24923,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, 
rtx op1, rtx op2)
 hdest = expand_simple_binop (himode, code, hop1, hop2,
 NULL_RTX, 1, OPTAB_DIRECT);
 
-  if (gen_truncate)
-emit_insn (gen_truncate (dest, hdest));
-  else
-{
-  struct expand_vec_perm_d d;
-  rtx wqdest = gen_reg_rtx (wqimode);
-  rtx wqres = gen_lowpart (wqimode, hdest);
-  bool ok;
-  int i;
-
-  /* Merge the data back into the right place.  */
-  d.target = wqdest;
-  d.op0 = d.op1 = wqres;
-  d.vmode = wqimode;
-  d.nelt = GET_MODE_NUNITS (wqimode);
-  d.one_operand_p = false;
-  d.testing_p = false;
-
-  for (i = 0; i < d.nelt; ++i)
-   d.perm[i] = i * 2;
-
-  ok = ix86_expand_vec_perm_const_1 (&d);
-  gcc_assert (ok);
-
-  emit_move_insn (dest, gen_lowpart (qimode, wqdest));
-}
-
+  emit_insn (gen_truncate (dest, hdest));
   return true;
 }


[gcc r15-6843] [PATCH] crc: Fix up some crc related wrong code issues [PR117997, PR118415]

2025-01-12 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:9c387a99a911724546abe99ecd39bfc968ed6333

commit r15-6843-g9c387a99a911724546abe99ecd39bfc968ed6333
Author: Jakub Jelinek 
Date:   Sun Jan 12 17:24:53 2025 -0700

[PATCH] crc: Fix up some crc related wrong code issues [PR117997, PR118415]

Hi!

As mentioned in the second PR, using table names like
crc_table_for_crc_8_polynomial_0x12
in the user namespace is wrong, user could have defined such variables
in their code and as can be seen on the last testcase, then it just
misbehaves.
At minimum such names should start with 2 underscores, moving it into
implementation namespace, and if possible have some dot or dollar in the
name if target supports it.
I think assemble_crc_table right now always emits tables a local variables,
I really don't see what would be setting TREE_PUBLIC flag on
IDENTIFIER_NODEs.
It might be nice to share the tables between TUs in the same binary or
shared library, but it in that case should have hidden visibility if
possible, so that it isn't exported from the libraries or binaries, we don't
want the optimization to affect set of exported symbols from libraries.
And, as can be seen in the first PR, building gen_rtx_SYMBOL_REF by hand
is certainly unexpected on some targets, e.g. those which use
-fsection-anchors, so we should instead use DECL_RTL of the VAR_DECL.
For that we'd need to look it up if we haven't emitted it already, while
IDENTIFIER_NODEs can be looked up easily, I guess for the VAR_DECLs we'd
need custom hash table.

Now, all of the above (except sharing between multiple TUs) is already
implemented in output_constant_def, so I think it is much better to just
use that function.

And, if we want to share it between multiple TUs, we could extend the
SHF_MERGE usage in gcc, currently we only use it for constant pool
entries with same size as alignment, from 1 to 32 bytes, using .rodata.cstN
sections.  We could just use say .rodata.cstM.N sections where M would be
alignment and N would be the entity size.  We could use that for all
constant pool entries say up to 2048 bytes.
Though, as the current code doesn't share between multiple TUs, I think it
can be done incrementally (either still for GCC 15, or GCC 16+).

Bootstrapped/regtested on {x86_64,i686,aarch64,powerpc64le,s390x}-linux, on
aarch64 it also fixes
-FAIL: crypto/rsa
-FAIL: hash
ok for trunk?

gcc/
PR tree-optimization/117997
PR middle-end/118415
* expr.cc (assemble_crc_table): Make static, remove id argument,
use output_constant_def.  Emit note if -fdump-rtl-expand-details
about which table has been emitted.
(generate_crc_table): Make static, adjust assemble_crc_table
caller, call it always.
(calculate_table_based_CRC): Make static.
* internal-fn.cc (expand_crc_optab_fn): Emit note if
-fdump-rtl-expand-details about using optab for crc.  Formatting 
fix.

gcc/testsuite/
* gcc.dg/crc-builtin-target32.c: Add -fdump-rtl-expand-details
as dg-additional-options.  Scan expand dump rather than assembly,
adjust the regexps.
* gcc.dg/crc-builtin-target64.c: Likewise.
* gcc.dg/crc-builtin-rev-target32.c: Likewise.
* gcc.dg/crc-builtin-rev-target64.c: Likewise.
* gcc.dg/pr117997.c: New test.
* gcc.dg/pr118415.c: New test.

Diff:
---
 gcc/expr.cc |  56 
 gcc/internal-fn.cc  |  12 ++-
 gcc/testsuite/gcc.dg/crc-builtin-rev-target32.c |   7 +-
 gcc/testsuite/gcc.dg/crc-builtin-rev-target64.c |   7 +-
 gcc/testsuite/gcc.dg/crc-builtin-target32.c |   7 +-
 gcc/testsuite/gcc.dg/crc-builtin-target64.c |   8 +-
 gcc/testsuite/gcc.dg/pr117997.c | 112 
 gcc/testsuite/gcc.dg/pr118415.c |  25 ++
 8 files changed, 181 insertions(+), 53 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 235e79546113..07fc85712e6b 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -14247,25 +14247,16 @@ calculate_crc (unsigned HOST_WIDE_INT crc,
   return crc;
 }
 
-/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS with
-   given ID.
-   ID is the identifier of the table, the name of the table is unique,
-   contains CRC size and the polynomial.
+/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS.
POLYNOM is the polynomial used to calculate the CRC table's elements.
CRC_BITS is the size of CRC, may be 8, 16, ... . */
 
-rtx
-assemble_crc_table (tree id, unsigned HOST_WIDE_INT polynom,
-   unsigned short crc_bits)
+static rtx
+assemble_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits)
 {
   unsigned tabl

[gcc r13-9309] Zen5 tuning part 5: update instruction latencies in x86-tune-costs

2025-01-12 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:f10d381dfc983ea32e5f72faadc7eb8126f114f6

commit r13-9309-gf10d381dfc983ea32e5f72faadc7eb8126f114f6
Author: Jan Hubicka 
Date:   Wed Sep 4 09:19:08 2024 +0200

Zen5 tuning part 5: update instruction latencies in x86-tune-costs

there is nothing exciting in this patch.  I measured latencies and also 
compared
them with newly released optimization guide.  There are no dramatic changes
compared to zen4.  One interesting new bit is that addss is faster and can 
be
2 cycles when fed by another addss.

I also increased the large insn bound since decoders seems no longer require
instructions to be 8 bytes or less.

gcc/ChangeLog:

* config/i386/x86-tune-costs.h (znver5_cost): Update instruction
costs.

(cherry picked from commit 4292297a0f938ffc953422fa246ff00fe345fe3d)

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index b89ac640ea5f..9edc6e36557d 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2034,6 +2034,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),   /* cost of a lea instruction.  */
   COSTS_N_INSNS (1),   /* variable shift costs.  */
   COSTS_N_INSNS (1),   /* constant shift costs.  */
+  /* mul has latency 3, executes in 3 integer units.  */
   {COSTS_N_INSNS (3),  /* cost of starting multiply for QI.  */
COSTS_N_INSNS (3),  /*   HI.  */
COSTS_N_INSNS (3),  /*   SI.  */
@@ -2041,6 +2042,8 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (3)}, /*  other.  */
   0,   /* cost of multiply per each bit
   set.  */
+  /* integer divide has latency of 8 cycles
+ plus 1 for every 9 bits of quotient.  */
   {COSTS_N_INSNS (10), /* cost of a divide/mod for QI.  */
COSTS_N_INSNS (11), /*  HI.  */
COSTS_N_INSNS (13), /*  SI.  */
@@ -2048,7 +2051,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (16)},/*  
other.  */
   COSTS_N_INSNS (1),   /* cost of movsx.  */
   COSTS_N_INSNS (1),   /* cost of movzx.  */
-  8,   /* "large" insn.  */
+  15,  /* "large" insn.  */
   9,   /* MOVE_RATIO.  */
   6,   /* CLEAR_RATIO */
   {6, 6, 6},   /* cost of loading integer registers
@@ -2065,12 +2068,13 @@ struct processor_costs znver5_cost = {
   2, 2, 2, /* cost of moving XMM,YMM,ZMM
   register.  */
   6,   /* cost of moving SSE register to 
integer.  */
-  /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
- throughput 5.  Approx 7 uops do not depend on vector size and every load
- is 5 uops.  */
+
+  /* TODO: gather and scatter instructions are currently disabled in
+ x86-tune.def.  In some cases they are however a win, see PR116582
+ We however need good cost model for them.  */
   14, 10,  /* Gather load static, per_elt.  */
   14, 20,  /* Gather store static, per_elt.  */
-  32,  /* size of l1 cache.  */
+  48,  /* size of l1 cache.  */
   1024,/* size of l2 cache.  */
   64,  /* size of prefetch block.  */
   /* New AMD processors never drop prefetches; if they cannot be performed
@@ -2080,6 +2084,8 @@ struct processor_costs znver5_cost = {
  time).  */
   100, /* number of parallel prefetches.  */
   3,   /* Branch cost.  */
+  /* TODO x87 latencies are still based on znver4.
+ Probably not very important these days.  */
   COSTS_N_INSNS (7),   /* cost of FADD and FSUB insns.  */
   COSTS_N_INSNS (7),   /* cost of FMUL instruction.  */
   /* Latency of fdiv is 8-15.  */
@@ -2089,16 +2095,24 @@ struct processor_costs znver5_cost = {
   /* Latency of fsqrt is 4-10.  */
   COSTS_N_INSNS (25),  /* cost of FSQRT instruction.  */
 
+  /* SSE instructions have typical throughput 4 and latency 1.  */
   COSTS_N_INSNS (1),   /* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (3),   /* cost of ADDSS/SD SUBSS/SD insns.  *

[gcc r15-6840] Dump all symbol attributes in show_attr.

2025-01-12 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:f4fa0b7d493a4ba217d989d3df75bbe3730874fc

commit r15-6840-gf4fa0b7d493a4ba217d989d3df75bbe3730874fc
Author: Thomas Koenig 
Date:   Sun Jan 12 23:02:34 2025 +0100

Dump all symbol attributes in show_attr.

gcc/fortran/ChangeLog:

* dump-parse-tree.cc (show_attr): Dump all symbol attributes.

Diff:
---
 gcc/fortran/dump-parse-tree.cc | 108 -
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 8d31ddfcffb3..97cab3f85f92 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -835,6 +835,8 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" VOLATILE", dumpfile);
   if (attr->threadprivate)
 fputs (" THREADPRIVATE", dumpfile);
+  if (attr->temporary)
+fputs (" TEMPORARY", dumpfile);
   if (attr->target)
 fputs (" TARGET", dumpfile);
   if (attr->dummy)
@@ -868,6 +870,8 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" IN-NAMELIST", dumpfile);
   if (attr->in_common)
 fputs (" IN-COMMON", dumpfile);
+  if (attr->in_equivalence)
+fputs (" IN_EQUIVALENDE", dumpfile);
 
   if (attr->abstract)
 fputs (" ABSTRACT", dumpfile);
@@ -926,6 +930,47 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" OMP-DECLARE-TARGET-LINK", dumpfile);
   if (attr->omp_declare_target_indirect)
 fputs (" OMP-DECLARE-TARGET-INDIRECT", dumpfile);
+  if (attr->omp_device_type == OMP_DEVICE_TYPE_HOST)
+fputs (" OMP-DEVICE-TYPE-HOST", dumpfile);
+  if (attr->omp_device_type == OMP_DEVICE_TYPE_NOHOST)
+fputs (" OMP-DEVICE-TYPE-NOHOST", dumpfile);
+  if (attr->omp_device_type == OMP_DEVICE_TYPE_ANY)
+fputs (" OMP-DEVICE-TYPE-ANY", dumpfile);
+  if (attr->omp_allocate)
+fputs (" OMP-ALLOCATE", dumpfile);
+
+  if (attr->oacc_declare_create)
+fputs (" OACC-DECLARE-CREATE", dumpfile);
+  if (attr->oacc_declare_copyin)
+fputs (" OACC-DECLARE-COPYIN", dumpfile);
+  if (attr->oacc_declare_deviceptr)
+fputs (" OACC-DECLARE-DEVICEPTR", dumpfile);
+  if (attr->oacc_declare_device_resident)
+fputs (" OACC-DECLARE-DEVICE-RESIDENT", dumpfile);
+
+  switch (attr->oacc_routine_lop)
+{
+case OACC_ROUTINE_LOP_NONE:
+case OACC_ROUTINE_LOP_ERROR:
+  break;
+
+case OACC_ROUTINE_LOP_GANG:
+  fputs (" OACC-ROUTINE-LOP-GANG", dumpfile);
+  break;
+
+case OACC_ROUTINE_LOP_WORKER:
+  fputs (" OACC-ROUTINE-LOP-WORKER", dumpfile);
+  break;
+
+case  OACC_ROUTINE_LOP_VECTOR:
+  fputs (" OACC-ROUTINE-LOP-VECTOR", dumpfile);
+  break;
+
+case OACC_ROUTINE_LOP_SEQ:
+  fputs (" OACC-ROUTINE-LOP-SEQ", dumpfile);
+  break;
+  }
+
   if (attr->elemental)
 fputs (" ELEMENTAL", dumpfile);
   if (attr->pure)
@@ -956,8 +1001,69 @@ show_attr (symbol_attribute *attr, const char * module)
 fputs (" IS-MAIN-PROGRAM", dumpfile);
   if (attr->oacc_routine_nohost)
 fputs (" OACC-ROUTINE-NOHOST", dumpfile);
+  if (attr->temporary)
+fputs (" TEMPORARY", dumpfile);
+  if (attr->assign)
+fputs (" ASSIGN", dumpfile);
+  if (attr->not_always_present)
+fputs (" NOT-ALWAYS-PRESENT", dumpfile);
+  if (attr->implied_index)
+fputs (" IMPLIED-INDEX", dumpfile);
+  if (attr->proc_pointer)
+fputs (" PROC-POINTER", dumpfile);
+  if (attr->fe_temp)
+fputs (" FE-TEMP", dumpfile);
+  if (attr->automatic)
+fputs (" AUTOMATIC", dumpfile);
+  if (attr->class_pointer)
+fputs (" CLASS-POINTER", dumpfile);
+  if (attr->save == SAVE_EXPLICIT)
+fputs (" SAVE-EXPLICIT", dumpfile);
+  if (attr->save == SAVE_IMPLICIT)
+fputs (" SAVE-IMPLICIT", dumpfile);
+  if (attr->used_in_submodule)
+fputs (" USED-IN-SUBMODULE", dumpfile);
+  if (attr->use_only)
+fputs (" USE-ONLY", dumpfile);
+  if (attr->use_rename)
+fputs (" USE-RENAME", dumpfile);
+  if (attr->imported)
+fputs (" IMPORTED", dumpfile);
+  if (attr->host_assoc)
+fputs (" HOST-ASSOC", dumpfile);
+  if (attr->generic)
+fputs (" GENERIC", dumpfile);
+  if (attr->generic_copy)
+fputs (" GENERIC-COPY", dumpfile);
+  if (attr->untyped)
+fputs (" UNTYPED", dumpfile);
+  if (attr->extension)
+fprintf (dumpfile, " EXTENSION(%u)", attr->extension);
+  if (attr->is_class)
+fputs (" IS-CLASS", dumpfile);
+  if (attr->class_ok)
+fputs (" CLASS-OK", dumpfile);
+  if (attr->vtab)
+fputs (" VTAB", dumpfile);
+  if (attr->vtype)
+fputs (" VTYPE", dumpfile);
+  if (attr->module_procedure)
+fputs (" MODULE-PROCEDURE", dumpfile);
+  if (attr->if_source == IFSRC_DECL)
+fputs (" IFSRC-DECL", dumpfile);
+  if (attr->if_source == IFSRC_IFBODY)
+fputs (" IFSRC-IFBODY", dumpfile);
+
+  for (int i = 0; i < EXT_ATTR_LAST; i++)
+{
+  if (attr->ext_attr & (1 << i))
+   {
+ fputs (" ATTRIBUTE-", dumpfile);
+ for (const char *p = ext_attr_list[i].name; p && *p; p++)
+