[gcc r15-6830] More memory leak fixes
https://gcc.gnu.org/g:bedf26c201894d340a4eb61927879fd0d82e3102 commit r15-6830-gbedf26c201894d340a4eb61927879fd0d82e3102 Author: Richard Biener Date: Fri Jan 10 16:25:35 2025 +0100 More memory leak fixes The following were found compiling SPEC CPU 2017 with valgrind. * tree-vect-slp.cc (vect_analyze_slp): Release saved_stmts vector. (vect_build_slp_tree_2): Release new_oprnds_info when not used. (vect_analyze_slp): Release root_stmts when gcond SLP build fails. Diff: --- gcc/tree-vect-slp.cc | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 337506419d92..02e7f5c4d587 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2678,6 +2678,8 @@ out: nops = 1; has_two_operators_perm = true; } + else + vect_free_oprnd_info (new_oprnds_info); } auto_vec children; @@ -4951,8 +4953,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, max_tree_size, &limit, bst_map, NULL, force_single_lane); } - saved_stmts.release (); } + saved_stmts.release (); } /* Make sure to vectorize only-live stmts, usually inductions. */ @@ -5013,10 +5015,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, stmts.create (1); stmts.quick_push (vect_stmt_to_vectorize (varg)); - vect_build_slp_instance (vinfo, slp_inst_kind_gcond, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, NULL, force_single_lane); + if (! vect_build_slp_instance (vinfo, slp_inst_kind_gcond, +stmts, roots, remain, +max_tree_size, &limit, +bst_map, NULL, force_single_lane)) + roots.release (); } /* Find and create slp instances for inductions that have been forced
[gcc r15-6831] Fix union member access for EXEC_INQUIRE.
https://gcc.gnu.org/g:40754a3b9bef83bf4da0675fcb378e8cd1675602 commit r15-6831-g40754a3b9bef83bf4da0675fcb378e8cd1675602 Author: Thomas Koenig Date: Sun Jan 12 13:05:25 2025 +0100 Fix union member access for EXEC_INQUIRE. gcc/fortran/ChangeLog: PR fortran/118432 * frontend-passes.cc (doloop_code): Select correct member of co->ext.union for inquire. Diff: --- gcc/fortran/frontend-passes.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc index 3a3328d44508..6ee6ce4c3ff1 100644 --- a/gcc/fortran/frontend-passes.cc +++ b/gcc/fortran/frontend-passes.cc @@ -2552,7 +2552,7 @@ doloop_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, break; case EXEC_INQUIRE: - if (co->ext.filepos->err) + if (co->ext.inquire->err) seen_goto = true; break;
[gcc r15-6833] Alpha: Restore frame pointer last in `builtin_longjmp' [PR64242]
https://gcc.gnu.org/g:3cf0e6ab2aa9e7cb9a406079ff19856a6461d9f0 commit r15-6833-g3cf0e6ab2aa9e7cb9a406079ff19856a6461d9f0 Author: Maciej W. Rozycki Date: Sun Jan 12 16:48:53 2025 + Alpha: Restore frame pointer last in `builtin_longjmp' [PR64242] Add similar arrangements to `builtin_longjmp' for Alpha as with commit 71b144289c1c ("re PR middle-end/64242 (Longjmp expansion incorrect)") and commit 511ed59d0b04 ("Fix PR64242 - Longjmp expansion incorrect"), so as to restore the frame pointer last, so that accesses to a local buffer supplied can still be fulfilled with memory accesses via the original frame pointer, fixing: FAIL: gcc.c-torture/execute/pr64242.c -O0 execution test FAIL: gcc.c-torture/execute/pr64242.c -O1 execution test FAIL: gcc.c-torture/execute/pr64242.c -O2 execution test FAIL: gcc.c-torture/execute/pr64242.c -O3 -g execution test FAIL: gcc.c-torture/execute/pr64242.c -Os execution test FAIL: gcc.c-torture/execute/pr64242.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test FAIL: gcc.c-torture/execute/pr64242.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test and adding no regressions in `alpha-linux-gnu' testing. gcc/ PR middle-end/64242 * config/alpha/alpha.md (`builtin_longjmp'): Restore frame pointer last. Add frame clobber and schedule blockage. Diff: --- gcc/config/alpha/alpha.md | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 35c8030422f5..178ce992206d 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -5005,14 +5005,28 @@ rtx pv = gen_rtx_REG (Pmode, 27); /* This bit is the same as expand_builtin_longjmp. */ + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); - emit_move_insn (hard_frame_pointer_rtx, fp); + emit_move_insn (pv, lab); + + /* Restore the frame pointer and stack pointer. We must use a + temporary since the setjmp buffer may be a local. */ + fp = copy_to_reg (fp); emit_stack_restore (SAVE_NONLOCAL, stack); + + /* Ensure the frame pointer move is not optimized. */ + emit_insn (gen_blockage ()); + emit_clobber (hard_frame_pointer_rtx); + emit_clobber (frame_pointer_rtx); + emit_move_insn (hard_frame_pointer_rtx, fp); + emit_use (hard_frame_pointer_rtx); emit_use (stack_pointer_rtx); + /* End of the bit corresponding to expand_builtin_longjmp. */ + /* Load the label we are jumping through into $27 so that we know where to look for it when we get back to setjmp's function for restoring the gp. */
[gcc r15-6834] Alpha: Always respect -mbwx, -mcix, -mfix, -mmax, and their inverse
https://gcc.gnu.org/g:19fdb9f3792d4c3c9ff3d18dc4566bb16e62de60 commit r15-6834-g19fdb9f3792d4c3c9ff3d18dc4566bb16e62de60 Author: Maciej W. Rozycki Date: Sun Jan 12 16:48:53 2025 + Alpha: Always respect -mbwx, -mcix, -mfix, -mmax, and their inverse Contrary to user documentation the `-mbwx', `-mcix', `-mfix', `-mmax' feature options and their inverse forms are ignored whenever `-mcpu=' option is in effect, either by having been given explicitly or where configured as the default such as with the `alphaev56-linux-gnu' target. In the latter case there is no way to change the settings these options are supposed to tweak other than with `-mcpu=' and the settings cannot be individually controlled, making all the feature options permanently inactive. It seems a regression from commit 7816bea0e23b ("config.gcc: Reorganize --with-cpu logic.") back in 2003, which replaced the setting of the default feature mask with the setting of the default CPU across a few targets, and the complementing logic in the Alpha backend wasn't updated accordingly. Fix this by making the individual feature options take precedence over `-mcpu='. Add test cases to verify this is the case, and to cover the defaults as well for the boundary cases. This has a drawback where the order of the options is ignored between `-mcpu=' and these individual options, so e.g. `-mno-bwx -mcpu=ev6' will keep the BWX feature disabled even though `-mcpu=ev6' comes later in the command line. This may affect some scenarios involving user overrides such as with CFLAGS passed to `configure' and `make' invocations. I do believe it has been our practice anyway for more finegrained options to override group options regardless of their relative order on the command line and in any case using `-mcpu=ev6 -mbwx' as the override will do the right thing if required, canceling any previous `-mno-bwx'. This has been spotted with `alphaev56-linux-gnu' target verification and a recently added test case: FAIL: gcc.target/alpha/stwx0.c -O1 scan-assembler-times \\sldq_u\\s 2 FAIL: gcc.target/alpha/stwx0.c -O1 scan-assembler-times \\smskwh\\s 1 FAIL: gcc.target/alpha/stwx0.c -O1 scan-assembler-times \\smskwl\\s 1 FAIL: gcc.target/alpha/stwx0.c -O1 scan-assembler-times \\sstq_u\\s 2 (and similarly for the remaining optimization levels covered) which this fix has addressed. gcc/ * config/alpha/alpha.cc (alpha_option_override): Ignore CPU flags corresponding to features the enabling or disabling of which has been requested with an individual feature option. gcc/testsuite/ * gcc.target/alpha/target-bwx-1.c: New file. * gcc.target/alpha/target-bwx-2.c: New file. * gcc.target/alpha/target-bwx-3.c: New file. * gcc.target/alpha/target-bwx-4.c: New file. * gcc.target/alpha/target-cix-1.c: New file. * gcc.target/alpha/target-cix-2.c: New file. * gcc.target/alpha/target-cix-3.c: New file. * gcc.target/alpha/target-cix-4.c: New file. * gcc.target/alpha/target-fix-1.c: New file. * gcc.target/alpha/target-fix-2.c: New file. * gcc.target/alpha/target-fix-3.c: New file. * gcc.target/alpha/target-fix-4.c: New file. * gcc.target/alpha/target-max-1.c: New file. * gcc.target/alpha/target-max-2.c: New file. * gcc.target/alpha/target-max-3.c: New file. * gcc.target/alpha/target-max-4.c: New file. Diff: --- gcc/config/alpha/alpha.cc | 5 +++-- gcc/testsuite/gcc.target/alpha/target-bwx-1.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-bwx-2.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-bwx-3.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-bwx-4.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-cix-1.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-cix-2.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-cix-3.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-cix-4.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-fix-1.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-fix-2.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-fix-3.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-fix-4.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-max-1.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-max-2.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-max-3.c | 6 ++ gcc/testsuite/gcc.target/alpha/target-max-4.c | 6 ++ 17 files changed, 99 insertions(+), 2 deletions(-) diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 030dc7728859..958a785ffd0e 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -460,8 +460,9 @@ alpha_option_override (void)
[gcc r15-6835] Alpha: Optimize block moves coming from longword-aligned source
https://gcc.gnu.org/g:4e557210b7f9fd669ff66c6958327eb2d4262d80 commit r15-6835-g4e557210b7f9fd669ff66c6958327eb2d4262d80 Author: Maciej W. Rozycki Date: Sun Jan 12 16:48:53 2025 + Alpha: Optimize block moves coming from longword-aligned source Now that we have proper alignment determination for block moves in place the case of copying a block of longword-aligned data has become real, so implement the merging of loaded data from pairs of SImode registers into single DImode registers for the purpose of using with unaligned stores efficiently, as suggested by a comment in `alpha_expand_block_move' and discard the comment. Provide test cases accordingly. gcc/ * config/alpha/alpha.cc (alpha_expand_block_move): Merge loaded data from pairs of SImode registers into single DImode registers if to be used with unaligned stores. gcc/testsuite/ * gcc.target/alpha/memcpy-si-aligned.c: New file. * gcc.target/alpha/memcpy-si-unaligned.c: New file. * gcc.target/alpha/memcpy-si-unaligned-dst.c: New file. * gcc.target/alpha/memcpy-si-unaligned-src.c: New file. * gcc.target/alpha/memcpy-si-unaligned-src-bwx.c: New file. Diff: --- gcc/config/alpha/alpha.cc | 45 +++ gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c | 16 +++ .../gcc.target/alpha/memcpy-si-unaligned-dst.c | 16 +++ .../gcc.target/alpha/memcpy-si-unaligned-src-bwx.c | 11 + .../gcc.target/alpha/memcpy-si-unaligned-src.c | 15 +++ .../gcc.target/alpha/memcpy-si-unaligned.c | 51 ++ 6 files changed, 146 insertions(+), 8 deletions(-) diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 958a785ffd0e..8ec9e8c5d399 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -3931,14 +3931,44 @@ alpha_expand_block_move (rtx operands[]) { words = bytes / 4; - for (i = 0; i < words; ++i) - data_regs[nregs + i] = gen_reg_rtx (SImode); + /* Load an even quantity of SImode data pieces only. */ + unsigned int hwords = words / 2; + for (i = 0; i / 2 < hwords; ++i) + { + data_regs[nregs + i] = gen_reg_rtx (SImode); + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + i * 4)); + } - for (i = 0; i < words; ++i) - emit_move_insn (data_regs[nregs + i], - adjust_address (orig_src, SImode, ofs + i * 4)); + /* If we'll be using unaligned stores, merge data from pairs +of SImode registers into DImode registers so that we can +store it more efficiently via quadword unaligned stores. */ + unsigned int j; + if (dst_align < 32) + for (i = 0, j = 0; i < words / 2; ++i, j = i * 2) + { + rtx hi = expand_simple_binop (DImode, ASHIFT, + data_regs[nregs + j + 1], + GEN_INT (32), NULL_RTX, + 1, OPTAB_WIDEN); + data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi, + data_regs[nregs + j], + NULL_RTX, + 1, OPTAB_WIDEN); + } + else + j = i; - nregs += words; + /* Take care of any remaining odd trailing SImode data piece. */ + if (j < words) + { + data_regs[nregs + i] = gen_reg_rtx (SImode); + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + j * 4)); + ++i; + } + + nregs += i; bytes -= words * 4; ofs += words * 4; } @@ -4057,13 +4087,12 @@ alpha_expand_block_move (rtx operands[]) } /* Due to the above, this won't be aligned. */ - /* ??? If we have more than one of these, consider constructing full - words in registers and using alpha_expand_unaligned_store_words. */ while (i < nregs && GET_MODE (data_regs[i]) == SImode) { alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); ofs += 4; i++; + gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode); } if (dst_align >= 16) diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c new file mode 100644 index ..2572a3187e9d --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-si-aligned.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned int aligned_src_si[17] = { [0 ... 16] = 0xeaebeced }; +unsigned int aligned_dst_si[17] = { [0 ... 16] = 0xdcdbdad9 }; + +void +memcpy_aligned_data_si
[gcc r15-6836] Alpha: Fix a block move pessimisation with zero-extension after LDWU
https://gcc.gnu.org/g:ed8cd42d138fa048e0c0eff1ea28b39f5abe1c29 commit r15-6836-ged8cd42d138fa048e0c0eff1ea28b39f5abe1c29 Author: Maciej W. Rozycki Date: Sun Jan 12 16:48:54 2025 + Alpha: Fix a block move pessimisation with zero-extension after LDWU For the BWX case we have a pessimisation in `alpha_expand_block_move' for HImode loads where we place the data loaded into a HImode register as well, therefore losing information that indeed the data loaded has already been zero-extended to the full DImode width of the register. Later on when we store this data in QImode quantities into an unaligned destination, we zero-extend it again for the purpose of right-shifting, such as with the test case included producing code at `-O2' as follows: ldah $2,unaligned_src_hi($29) !gprelhigh lda $1,unaligned_src_hi($2) !gprellow ldwu $6,unaligned_src_hi($2)!gprellow ldwu $5,2($1) ldwu $4,4($1) bis $31,$31,$31 zapnot $6,3,$3 # Redundant! ldbu $7,6($1) zapnot $5,3,$2 # Redundant! stb $6,0($16) zapnot $4,3,$1 # Redundant! stb $5,2($16) srl $3,8,$3 stb $4,4($16) srl $2,8,$2 stb $3,1($16) srl $1,8,$1 stb $2,3($16) stb $1,5($16) stb $7,6($16) The non-BWX case is unaffected, because there we use byte insertion, so we don't care that data is held in a HImode register. Address this by making the holding RTX a HImode subreg of the original DImode register, which the RTL passes can then see through and eliminate the zero-extension where otherwise required, resulting in this shortened code: ldah $2,unaligned_src_hi($29) !gprelhigh lda $1,unaligned_src_hi($2) !gprellow ldwu $4,unaligned_src_hi($2)!gprellow ldwu $3,2($1) ldwu $2,4($1) bis $31,$31,$31 srl $4,8,$6 ldbu $1,6($1) srl $3,8,$5 stb $4,0($16) stb $6,1($16) srl $2,8,$4 stb $3,2($16) stb $5,3($16) stb $2,4($16) stb $4,5($16) stb $1,6($16) While at it reformat the enclosing do-while statement according to the GNU Coding Standards, observing that in this case it does not obfuscate the change owing to the odd original indentation. gcc/ * config/alpha/alpha.cc (alpha_expand_block_move): Use a HImode subreg of a DImode register to hold data from an aligned HImode load. Diff: --- gcc/config/alpha/alpha.cc | 17 +++-- .../gcc.target/alpha/memcpy-hi-unaligned-dst.c | 16 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 8ec9e8c5d399..6965ece16d0b 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -3999,14 +3999,19 @@ alpha_expand_block_move (rtx operands[]) if (bytes >= 2) { if (src_align >= 16) - { - do { - data_regs[nregs++] = tmp = gen_reg_rtx (HImode); - emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); + do + { + tmp = gen_reg_rtx (DImode); + emit_move_insn (tmp, + expand_simple_unop (DImode, SET, + adjust_address (orig_src, + HImode, ofs), + NULL_RTX, 1)); + data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0); bytes -= 2; ofs += 2; - } while (bytes >= 2); - } + } + while (bytes >= 2); else if (! TARGET_BWX) { data_regs[nregs++] = tmp = gen_reg_rtx (HImode); diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c new file mode 100644 index ..4e3c02f5b906 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-hi-unaligned-dst.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mbwx" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned short unaligned_src_hi[4]; + +void +memcpy_unaligned_dst_hi (void *dst) +{ + __builtin_memcpy (dst, unaligned_src_hi, 7); +} + +/* { dg-final { scan-assembler-times "\\sldwu\\s" 3 } } */ +/* { dg-final { scan-assembler-times "\\sldbu\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstb\\s" 7 } } */ +/* { dg-final { scan-assembler-not "\\szapnot\\s" } } */
[gcc r15-6832] Alpha: Add memory clobbers to `builtin_longjmp' expansion
https://gcc.gnu.org/g:46861167f548ec622918d95acd2424b64f56797d commit r15-6832-g46861167f548ec622918d95acd2424b64f56797d Author: Maciej W. Rozycki Date: Sun Jan 12 16:48:53 2025 + Alpha: Add memory clobbers to `builtin_longjmp' expansion Add the same memory clobbers to `builtin_longjmp' for Alpha as with commit 41439bf6a647 ("builtins.c (expand_builtin_longjmp): Added two memory clobbers."), to prevent instructions that access memory via the frame or stack pointer from being moved across the write to the frame pointer. gcc/ * config/alpha/alpha.md (builtin_longjmp): Add memory clobbers. Diff: --- gcc/config/alpha/alpha.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 376c4cba90c5..35c8030422f5 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -5005,6 +5005,8 @@ rtx pv = gen_rtx_REG (Pmode, 27); /* This bit is the same as expand_builtin_longjmp. */ + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); + emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); emit_move_insn (hard_frame_pointer_rtx, fp); emit_move_insn (pv, lab); emit_stack_restore (SAVE_NONLOCAL, stack);
[gcc r15-6837] Fortran: implement F2018 intrinsic OUT_OF_RANGE [PR115788]
https://gcc.gnu.org/g:f8eda60e12dabaf5e9501104781ef5eba334cff7 commit r15-6837-gf8eda60e12dabaf5e9501104781ef5eba334cff7 Author: Harald Anlauf Date: Sun Jan 12 19:26:35 2025 +0100 Fortran: implement F2018 intrinsic OUT_OF_RANGE [PR115788] Implementation of the Fortran 2018 standard intrinsic OUT_OF_RANGE, with the GNU Fortran extension to unsigned integers. Runtime code is fully inline expanded. PR fortran/115788 gcc/fortran/ChangeLog: * check.cc (gfc_check_out_of_range): Check arguments to intrinsic. * expr.cc (free_expr0): Fix a memleak with unsigned literals. * gfortran.h (enum gfc_isym_id): Define GFC_ISYM_OUT_OF_RANGE. * gfortran.texi: Add OUT_OF_RANGE to list of intrinsics supporting UNSIGNED. * intrinsic.cc (add_functions): Add Fortran prototype. Break some nearby lines with excessive length. * intrinsic.h (gfc_check_out_of_range): Add prototypes. * intrinsic.texi: Fortran documentation of OUT_OF_RANGE. * simplify.cc (gfc_simplify_out_of_range): Compile-time simplification of OUT_OF_RANGE. * trans-intrinsic.cc (gfc_conv_intrinsic_out_of_range): Generate inline expansion of runtime code for OUT_OF_RANGE. (gfc_conv_intrinsic_function): Use it. gcc/testsuite/ChangeLog: * gfortran.dg/ieee/out_of_range.f90: New test. * gfortran.dg/out_of_range_1.f90: New test. * gfortran.dg/out_of_range_2.f90: New test. * gfortran.dg/out_of_range_3.f90: New test. Diff: --- gcc/fortran/check.cc| 42 + gcc/fortran/expr.cc | 1 + gcc/fortran/gfortran.h | 1 + gcc/fortran/gfortran.texi | 7 +- gcc/fortran/intrinsic.cc| 28 +++- gcc/fortran/intrinsic.h | 2 + gcc/fortran/intrinsic.texi | 67 gcc/fortran/simplify.cc | 208 gcc/fortran/trans-intrinsic.cc | 196 ++ gcc/testsuite/gfortran.dg/ieee/out_of_range.f90 | 65 gcc/testsuite/gfortran.dg/out_of_range_1.f90| 91 +++ gcc/testsuite/gfortran.dg/out_of_range_2.f90| 115 + gcc/testsuite/gfortran.dg/out_of_range_3.f90| 25 +++ 13 files changed, 835 insertions(+), 13 deletions(-) diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index e29ad3986110..35458643835c 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -4864,6 +4864,48 @@ gfc_check_null (gfc_expr *mold) } +bool +gfc_check_out_of_range (gfc_expr *x, gfc_expr *mold, gfc_expr *round) +{ + if (!int_or_real_or_unsigned_check (x, 0)) +return false; + + if (mold == NULL) +return false; + + if (!int_or_real_or_unsigned_check (mold, 1)) +return false; + + if (!scalar_check (mold, 1)) +return false; + + if (round) +{ + if (!type_check (round, 2, BT_LOGICAL)) + return false; + + if (!scalar_check (round, 2)) + return false; + + if (x->ts.type != BT_REAL + || (mold->ts.type != BT_INTEGER && mold->ts.type != BT_UNSIGNED)) + { + gfc_error ("%qs argument of %qs intrinsic at %L shall appear " +"only if %qs is of type REAL and %qs is of type " +"INTEGER or UNSIGNED", +gfc_current_intrinsic_arg[2]->name, +gfc_current_intrinsic, &round->where, +gfc_current_intrinsic_arg[0]->name, +gfc_current_intrinsic_arg[1]->name); + + return false; + } +} + + return true; +} + + bool gfc_check_pack (gfc_expr *array, gfc_expr *mask, gfc_expr *vector) { diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc index 0e40b2493a5c..7f3f6c52fb54 100644 --- a/gcc/fortran/expr.cc +++ b/gcc/fortran/expr.cc @@ -466,6 +466,7 @@ free_expr0 (gfc_expr *e) switch (e->ts.type) { case BT_INTEGER: + case BT_UNSIGNED: mpz_clear (e->value.integer); break; diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 6293d85778c0..70913e3312b2 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -626,6 +626,7 @@ enum gfc_isym_id GFC_ISYM_NULL, GFC_ISYM_NUM_IMAGES, GFC_ISYM_OR, + GFC_ISYM_OUT_OF_RANGE, GFC_ISYM_PACK, GFC_ISYM_PARITY, GFC_ISYM_PERROR, diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 116667245932..d3fe0935aa44 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2830,6 +2830,7 @@ The following intrinsics take unsigned arguments: @item @code{MODULO}, @pxref{MODULO} @item @code{MVBITS}, @pxref{MVBITS} @item @code{NOT}, @pxref{NOT} +@item @code{OUT_OF_RANGE}, @pxref{OUT_OF_RANGE} @
[gcc r15-6838] c: UX improvements to 'too {few, many} arguments' errors (v5) [PR118112]
https://gcc.gnu.org/g:a236f70617213343f3075ee43e8d9f5882dca400 commit r15-6838-ga236f70617213343f3075ee43e8d9f5882dca400 Author: David Malcolm Date: Sun Jan 12 13:46:31 2025 -0500 c: UX improvements to 'too {few,many} arguments' errors (v5) [PR118112] Consider this case of a bad call to a callback function (perhaps due to C23 changing the meaning of () in function decls): struct p { int (*bar)(); }; void baz() { struct p q; q.bar(1); } Before this patch the C frontend emits: t.c: In function 'baz': t.c:7:5: error: too many arguments to function 'q.bar' 7 | q.bar(1); | ^ which doesn't give the user much help in terms of knowing what was expected, and where the relevant declaration is. With this patch the C frontend emits: t.c: In function 'baz': t.c:7:5: error: too many arguments to function 'q.bar'; expected 0, have 1 7 | q.bar(1); | ^ ~ t.c:2:15: note: declared here 2 | int (*bar)(); | ^~~ (showing the expected vs actual counts, the pertinent field decl, and underlining the first extraneous argument at the callsite) Similarly, the patch also updates the "too few arguments" case to also show expected vs actual counts. Doing so requires a tweak to the wording to say "at least" for the case of variadic fns where previously the C FE emitted e.g.: s.c: In function 'test': s.c:5:3: error: too few arguments to function 'callee' 5 | callee (); | ^~ s.c:1:6: note: declared here 1 | void callee (const char *, ...); | ^~ with this patch it emits: s.c: In function 'test': s.c:5:3: error: too few arguments to function 'callee'; expected at least 1, have 0 5 | callee (); | ^~ s.c:1:6: note: declared here 1 | void callee (const char *, ...); | ^~ gcc/c/ChangeLog: PR c/118112 * c-typeck.cc (inform_declaration): Add "function_expr" param and use it for cases where we couldn't show the function decl to show field decls for callbacks. (build_function_call_vec): Add missing auto_diagnostic_group. Update for new param of inform_declaration. (convert_arguments): Likewise. For the "too many arguments" case add the expected vs actual counts to the message, and if we have it, add the location_t of the first surplus param as a secondary location within the diagnostic. For the "too few arguments" case, determine the minimum number of arguments required and add the expected vs actual counts to the message, tweaking it to "at least" for variadic functions. gcc/testsuite/ChangeLog: PR c/118112 * gcc.dg/too-few-arguments.c: New test. * gcc.dg/too-many-arguments.c: New test. Signed-off-by: David Malcolm Diff: --- gcc/c/c-typeck.cc | 77 gcc/testsuite/gcc.dg/too-few-arguments.c | 28 +++ gcc/testsuite/gcc.dg/too-many-arguments.c | 83 +++ 3 files changed, 177 insertions(+), 11 deletions(-) diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 6e40f7edf02a..cd9290160d7a 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -3737,14 +3737,30 @@ build_function_call (location_t loc, tree function, tree params) return ret; } -/* Give a note about the location of the declaration of DECL. */ +/* Give a note about the location of the declaration of DECL, + or, failing that, a pertinent declaration for FUNCTION_EXPR. */ static void -inform_declaration (tree decl) +inform_declaration (tree decl, tree function_expr) { if (decl && (TREE_CODE (decl) != FUNCTION_DECL || !DECL_IS_UNDECLARED_BUILTIN (decl))) inform (DECL_SOURCE_LOCATION (decl), "declared here"); + else if (function_expr) +switch (TREE_CODE (function_expr)) + { + default: + break; + case COMPONENT_REF: + /* Show the decl of the pertinent field (e.g. for callback + fields in a struct. */ + { + tree field_decl = TREE_OPERAND (function_expr, 1); + if (location_t loc = DECL_SOURCE_LOCATION (field_decl)) + inform (loc, "declared here"); + } + break; + } } /* C implementation of callback for use when checking param types. */ @@ -3819,10 +3835,11 @@ build_function_call_vec (location_t loc, vec arg_loc, function); else if (DECL_P (function)) { + auto_diagnostic_group d; error_at (loc, "called object %qD is not a function or function pointer", funct
[gcc r15-6845] lto: Fix empty fnctl.h build error with MinGW.
https://gcc.gnu.org/g:89ebb88d1d73ea8f693f2195321b402c31186abe commit r15-6845-g89ebb88d1d73ea8f693f2195321b402c31186abe Author: Michal Jires Date: Mon Jan 13 01:58:41 2025 +0100 lto: Fix empty fnctl.h build error with MinGW. MSYS2+MinGW contains headers without defining expected contents. This fix checks that the fcntl function is actually defined. Bootstrapped/regtested on x86_64-linux. Committed as obvious. gcc/ChangeLog: * lockfile.cc (LOCKFILE_USE_FCNTL): New. (lockfile::lock_write): Use LOCKFILE_USE_FCNTL. (lockfile::try_lock_write): Use LOCKFILE_USE_FCNTL. (lockfile::lock_read): Use LOCKFILE_USE_FCNTL. (lockfile::unlock): Use LOCKFILE_USE_FCNTL. (lockfile::lockfile_supported): Use LOCKFILE_USE_FCNTL. Diff: --- gcc/lockfile.cc | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/gcc/lockfile.cc b/gcc/lockfile.cc index b385c295851f..cecbb86491da 100644 --- a/gcc/lockfile.cc +++ b/gcc/lockfile.cc @@ -22,6 +22,10 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "lockfile.h" +/* fcntl.h may exist without expected contents. */ +#if HAVE_FCNTL_H && HOST_HAS_F_SETLKW +#define LOCKFILE_USE_FCNTL 1 +#endif /* Unique write lock. No other lock can be held on this lockfile. Blocking call. */ @@ -32,7 +36,7 @@ lockfile::lock_write () if (fd < 0) return -1; -#if HAVE_FCNTL_H +#ifdef LOCKFILE_USE_FCNTL struct flock s_flock; s_flock.l_whence = SEEK_SET; @@ -57,7 +61,7 @@ lockfile::try_lock_write () if (fd < 0) return -1; -#if HAVE_FCNTL_H +#ifdef LOCKFILE_USE_FCNTL struct flock s_flock; s_flock.l_whence = SEEK_SET; @@ -87,7 +91,7 @@ lockfile::lock_read () if (fd < 0) return -1; -#if HAVE_FCNTL_H +#ifdef LOCKFILE_USE_FCNTL struct flock s_flock; s_flock.l_whence = SEEK_SET; @@ -108,7 +112,7 @@ lockfile::unlock () { if (fd < 0) { -#if HAVE_FCNTL_H +#ifdef LOCKFILE_USE_FCNTL struct flock s_flock; s_flock.l_whence = SEEK_SET; @@ -128,7 +132,7 @@ lockfile::unlock () bool lockfile::lockfile_supported () { -#if HAVE_FCNTL_H +#ifdef LOCKFILE_USE_FCNTL return true; #else return false;
[gcc r15-6846] lto: Pass cache checksum by reference [PR118181]
https://gcc.gnu.org/g:9100be5741329dfe7bd49d6cf60be1771b9bb3ea commit r15-6846-g9100be5741329dfe7bd49d6cf60be1771b9bb3ea Author: Michal Jires Date: Mon Jan 13 02:49:58 2025 +0100 lto: Pass cache checksum by reference [PR118181] Bootstrapped/regtested on x86_64-linux. Committed as obvious. PR lto/118181 gcc/ChangeLog: * lto-ltrans-cache.cc (ltrans_file_cache::create_item): Pass checksum by reference. * lto-ltrans-cache.h: Likewise. Diff: --- gcc/lto-ltrans-cache.cc | 2 +- gcc/lto-ltrans-cache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/lto-ltrans-cache.cc b/gcc/lto-ltrans-cache.cc index 22c0bffaed59..c57775fae851 100644 --- a/gcc/lto-ltrans-cache.cc +++ b/gcc/lto-ltrans-cache.cc @@ -309,7 +309,7 @@ ltrans_file_cache::save_cache () Must be called with creation_lock held to prevent data race. */ ltrans_file_cache::item* -ltrans_file_cache::create_item (checksum_t checksum) +ltrans_file_cache::create_item (const checksum_t& checksum) { size_t prefix_len = cache_prefix.size (); diff --git a/gcc/lto-ltrans-cache.h b/gcc/lto-ltrans-cache.h index b95f63c33357..5fef44bae538 100644 --- a/gcc/lto-ltrans-cache.h +++ b/gcc/lto-ltrans-cache.h @@ -108,7 +108,7 @@ private: New input/output files are chosen to not collide with other items. Must be called with creation_lock held to prevent data race. */ - item* create_item (checksum_t checksum); + item* create_item (const checksum_t& checksum); /* Prunes oldest unused cache items over limit. Must be called with deletion_lock held to prevent data race. */
[gcc(refs/users/aoliva/heads/testme)] [ifcombine] propagate signbit mask to XOR right-hand operand
https://gcc.gnu.org/g:7f21e67697013a0ae714c5198145c5ae029c942d commit 7f21e67697013a0ae714c5198145c5ae029c942d Author: Alexandre Oliva Date: Sun Jan 12 22:16:21 2025 -0300 [ifcombine] propagate signbit mask to XOR right-hand operand If a single-bit bitfield takes up the sign bit of a storage unit, comparing the corresponding bitfield between two objects loads the storage units, XORs them, converts the result to signed char, and compares it with zero: ((signed char)(a. ^ c.) >= 0). fold_truth_andor_for_ifcombine recognizes the compare with zero as a sign bit test, then it decomposes the XOR into an equality test. The problem is that, after this decomposition, that figures out the width of the accessed fields, we apply the sign bit mask to the left-hand operand of the compare, but we failed to also apply it to the right-hand operand when both were taken from the same XOR. This patch fixes that. for gcc/ChangeLog PR tree-optimization/118409 * gimple-fold.cc (fold_truth_andor_for_ifcombine): Apply the signbit mask to the right-hand XOR operand too. for gcc/testsuite/ChangeLog PR tree-optimization/118409 * gcc.dg/field-merge-20.c: New. Diff: --- gcc/gimple-fold.cc| 20 +++ gcc/testsuite/gcc.dg/field-merge-20.c | 64 +++ 2 files changed, 84 insertions(+) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index a3987c4590ae..93ed8b3abb05 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8270,6 +8270,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, ll_and_mask = sign; else ll_and_mask &= sign; + if (l_xor) + { + if (!lr_and_mask.get_precision ()) + lr_and_mask = sign; + else + lr_and_mask &= sign; + if (l_const.get_precision ()) + l_const &= wide_int::from (lr_and_mask, + l_const.get_precision (), UNSIGNED); + } } if (rsignbit) @@ -8279,6 +8289,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, rl_and_mask = sign; else rl_and_mask &= sign; + if (r_xor) + { + if (!rr_and_mask.get_precision ()) + rr_and_mask = sign; + else + rr_and_mask &= sign; + if (r_const.get_precision ()) + r_const &= wide_int::from (rr_and_mask, + r_const.get_precision (), UNSIGNED); + } } /* If either comparison code is not correct for our logical operation, diff --git a/gcc/testsuite/gcc.dg/field-merge-20.c b/gcc/testsuite/gcc.dg/field-merge-20.c new file mode 100644 index ..44ac7fae50dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-20.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +/* tree-optimization/118409 */ + +/* Check that tests involving a sign bit of a storage unit are handled + correctly. The compares are turned into xor tests by earlier passes, and + ifcombine has to propagate the sign bit mask to the right hand of the + compare extracted from the xor, otherwise we'll retain unwanted bits for the + compare. */ + +typedef struct { +int p : __CHAR_BIT__; +int d : 1; +int b : __CHAR_BIT__ - 2; +int e : 1; +} g; + +g a = {.d = 1, .e = 1}, c = {.b = 1, .d = 1, .e = 1}; + +__attribute__((noipa)) +int f1 () +{ + if (a.d == c.d + && a.e == c.e) +return 0; + return -1; +} + +__attribute__((noipa)) +int f2 () +{ + if (a.d != c.d + || a.e != c.e) +return -1; + return 0; +} + +__attribute__((noipa)) +int f3 () +{ + if (c.d == a.d + && c.e == a.e) +return 0; + return -1; +} + +__attribute__((noipa)) +int f4 () +{ + if (c.d != a.d + || c.e != a.e) +return -1; + return 0; +} + +int main() { + if (f1 () < 0 + || f2 () < 0 + || f3 () < 0 + || f4 () < 0) +__builtin_abort(); + return 0; +}
[gcc/aoliva/heads/testme] [ifcombine] propagate signbit mask to XOR right-hand operan
The branch 'aoliva/heads/testme' was updated to point to: 7f21e6769701... [ifcombine] propagate signbit mask to XOR right-hand operan It previously pointed to: cc2aaa9ac0d3... [ifcombine] propagate signbit mask to xor right-hand operan Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- cc2aaa9... [ifcombine] propagate signbit mask to xor right-hand operan Summary of changes (added commits): --- 7f21e67... [ifcombine] propagate signbit mask to XOR right-hand operan
[gcc/aoliva/heads/testme] (167 commits) [ifcombine] propagate signbit mask to xor right-hand operan
The branch 'aoliva/heads/testme' was updated to point to: cc2aaa9ac0d3... [ifcombine] propagate signbit mask to xor right-hand operan It previously pointed to: f419ad18d1ba... [ifcombine] drop other misuses of uniform_integer_cst_p Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- f419ad1... [ifcombine] drop other misuses of uniform_integer_cst_p 98ead44... [ifcombine] fix mask variable test to match use [PR118344] b88da51... [ifcombine] reuse left-hand mask to decode right-hand xor o 1a5cf10... [ifcombine] adjust for narrowing converts before shifts [PR 41dd4aa... testsuite: generalized field-merge tests for <32-bit int [P fe67e1e... testsuite: generalize ifcombine field-merge tests [PR118025 aa7a47f... ifcombine field-merge: improve handling of dwords d19ac11... [testsuite] rearrange requirements for dfp bitint run tests Summary of changes (added commits): --- cc2aaa9... [ifcombine] propagate signbit mask to xor right-hand operan 47ac6ca... [ifcombine] drop other misuses of uniform_integer_cst_p (*) fd4e979... [ifcombine] fix mask variable test to match use [PR118344] (*) 740c849... [ifcombine] reuse left-hand mask to decode right-hand xor o (*) c96a6c2... [ifcombine] adjust for narrowing converts before shifts [PR (*) d3c91b0... testsuite: generalized field-merge tests for <32-bit int [P (*) 261ffe6... testsuite: generalize ifcombine field-merge tests [PR118025 (*) 38401c5... ifcombine field-merge: improve handling of dwords (*) d019ab4... ipa-cp: Fold-convert values when necessary (PR 118138) (*) 86175a6... nvptx: Add '__builtin_frame_address(0)' test case (*) 91dec10... nvptx: Add '__builtin_stack_address()' test case (*) f447c3c... testsuite: arm: Use -std=c17 and effective-target arm_arch_ (*) 3ff216b... ada: Incorrect accessibilty level for library level subprog (*) c92f9f0... ada: Remove empty line. (*) c43a533... ada: Set syntactic node properties immediately when crating (*) 8c850dd... ada: Turn Is_Effective_Use_Clause from syntactic to semanti (*) 2b27522... ada: Reorder syntactic node fields to match the Ada RM gram (*) 38a13ea... c++: Fix up ICEs on constexpr inline asm strings in templat (*) 933f0c2... c++: Fix up modules handling of namespace scope structured (*) f5e488c... fortran: use_iso_fortran_env_module tweaks [PR118337] (*) bd28244... c++: improve some modules comments (*) 6fe3950... c++: modules, generic lambda, constexpr if (*) e8a5788... LoongArch: Opitmize the cost of vec_construct. (*) 979ca3b... Daily bump. (*) 2d0f345... RISC-V: testsuite: fix target selector for sync_char_short (*) 08b6e87... AArch64: Fix costing of emulated gathers/scatters [PR118188 (*) fab96de... [PR118017][LRA]: Don't inherit reg of non-uniform reg class (*) 3cae3a8... c++: be permissive about eh spec mismatch for op new (*) 424a9ac... testsuite: arm: Fix typo in gcc.target/arm/armv8_2-fp16-con (*) 8e41205... s390: Add testcase for just fixed PR118362 (*) 21571cd... c: Restore warning for incomplete structures declared in pa (*) 681934a... testsuite: arm: Use -Os in memset-inline-8* tests (*) 794f672... testsuite: arm: Verify asm per function for armv8_2-fp16-co (*) c6b5430... c, c++: preserve type name in conversion [PR116060] (*) 04f4ac9... testsuite: Require trampolines for gcc.dg/pr118325.c (*) 2f31819... s390: Fix s390_constant_via_vgbm_p() [PR118362] (*) ca79349... c++: ICE during requires-expr partial subst [PR118060] (*) 27d620d... c++: tf_partial and instantiate_template [PR117887] (*) 76d1061... c++: constexpr potentiality of CAST_EXPR [PR117925] (*) eeedc54... c++: relax ICE for unexpected trees during constexpr [PR117 (*) 57904dc... c++: current inst w/ indirect dependent bases [PR117993] (*) 40f0f6a... c++: template-id dependence wrt local static arg [PR117792] (*) 8231019... arm: [MVE intrinsics] Another fix for moves of tuples (PR t (*) 310c8a6... 'git mv gcc/testsuite/gcc.dg/{,torture/}crc-linux-3.c' (*) 3861d36... nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65 (*) 1db025c... Avoid PHI node re-allocation in loop copying (*) 3b69427... ada: Fix missing detection of late equality operator return (*) f622acc... ada: Accept predefined multiply operator for fixed point in (*) d107140... Fortran: Cylce detection for non vtypes only. [PR118337] (*) 14879ba... ree: Skip extension on fixed register (*) 659b70b... ada: Error on Disable_Controlled aspect in Multiway_Trees (*) aa086b7... ada: Cleanup preanalysis of static expressions (part 3) (*) 2cbd440... match.pd: Avoid introducing UB in the a r<< (32-b) -> a r>> (*) c5e71d2... fortran: Accept "15" modules for compatibility [PR118337] (*) b37628e... i386: Remove not used model number for Diamond Rapids (*) 00b77db... RISC-V: Refine registered_functions list for rvv overloaded
[gcc/aoliva/heads/testbase] (166 commits) [ifcombine] drop other misuses of uniform_integer_cst_p
The branch 'aoliva/heads/testbase' was updated to point to: 47ac6ca9cb08... [ifcombine] drop other misuses of uniform_integer_cst_p It previously pointed to: 1b1a33f76879... expand: drop stack adjustments after barrier [PR118006] Diff: Summary of changes (added commits): --- 47ac6ca... [ifcombine] drop other misuses of uniform_integer_cst_p (*) fd4e979... [ifcombine] fix mask variable test to match use [PR118344] (*) 740c849... [ifcombine] reuse left-hand mask to decode right-hand xor o (*) c96a6c2... [ifcombine] adjust for narrowing converts before shifts [PR (*) d3c91b0... testsuite: generalized field-merge tests for <32-bit int [P (*) 261ffe6... testsuite: generalize ifcombine field-merge tests [PR118025 (*) 38401c5... ifcombine field-merge: improve handling of dwords (*) d019ab4... ipa-cp: Fold-convert values when necessary (PR 118138) (*) 86175a6... nvptx: Add '__builtin_frame_address(0)' test case (*) 91dec10... nvptx: Add '__builtin_stack_address()' test case (*) f447c3c... testsuite: arm: Use -std=c17 and effective-target arm_arch_ (*) 3ff216b... ada: Incorrect accessibilty level for library level subprog (*) c92f9f0... ada: Remove empty line. (*) c43a533... ada: Set syntactic node properties immediately when crating (*) 8c850dd... ada: Turn Is_Effective_Use_Clause from syntactic to semanti (*) 2b27522... ada: Reorder syntactic node fields to match the Ada RM gram (*) 38a13ea... c++: Fix up ICEs on constexpr inline asm strings in templat (*) 933f0c2... c++: Fix up modules handling of namespace scope structured (*) f5e488c... fortran: use_iso_fortran_env_module tweaks [PR118337] (*) bd28244... c++: improve some modules comments (*) 6fe3950... c++: modules, generic lambda, constexpr if (*) e8a5788... LoongArch: Opitmize the cost of vec_construct. (*) 979ca3b... Daily bump. (*) 2d0f345... RISC-V: testsuite: fix target selector for sync_char_short (*) 08b6e87... AArch64: Fix costing of emulated gathers/scatters [PR118188 (*) fab96de... [PR118017][LRA]: Don't inherit reg of non-uniform reg class (*) 3cae3a8... c++: be permissive about eh spec mismatch for op new (*) 424a9ac... testsuite: arm: Fix typo in gcc.target/arm/armv8_2-fp16-con (*) 8e41205... s390: Add testcase for just fixed PR118362 (*) 21571cd... c: Restore warning for incomplete structures declared in pa (*) 681934a... testsuite: arm: Use -Os in memset-inline-8* tests (*) 794f672... testsuite: arm: Verify asm per function for armv8_2-fp16-co (*) c6b5430... c, c++: preserve type name in conversion [PR116060] (*) 04f4ac9... testsuite: Require trampolines for gcc.dg/pr118325.c (*) 2f31819... s390: Fix s390_constant_via_vgbm_p() [PR118362] (*) ca79349... c++: ICE during requires-expr partial subst [PR118060] (*) 27d620d... c++: tf_partial and instantiate_template [PR117887] (*) 76d1061... c++: constexpr potentiality of CAST_EXPR [PR117925] (*) eeedc54... c++: relax ICE for unexpected trees during constexpr [PR117 (*) 57904dc... c++: current inst w/ indirect dependent bases [PR117993] (*) 40f0f6a... c++: template-id dependence wrt local static arg [PR117792] (*) 8231019... arm: [MVE intrinsics] Another fix for moves of tuples (PR t (*) 310c8a6... 'git mv gcc/testsuite/gcc.dg/{,torture/}crc-linux-3.c' (*) 3861d36... nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65 (*) 1db025c... Avoid PHI node re-allocation in loop copying (*) 3b69427... ada: Fix missing detection of late equality operator return (*) f622acc... ada: Accept predefined multiply operator for fixed point in (*) d107140... Fortran: Cylce detection for non vtypes only. [PR118337] (*) 14879ba... ree: Skip extension on fixed register (*) 659b70b... ada: Error on Disable_Controlled aspect in Multiway_Trees (*) aa086b7... ada: Cleanup preanalysis of static expressions (part 3) (*) 2cbd440... match.pd: Avoid introducing UB in the a r<< (32-b) -> a r>> (*) c5e71d2... fortran: Accept "15" modules for compatibility [PR118337] (*) b37628e... i386: Remove not used model number for Diamond Rapids (*) 00b77db... RISC-V: Refine registered_functions list for rvv overloaded (*) 1bb367b... OpenMP: declare variant's append_args + dispatch interop fi (*) 5f61fb4... Daily bump. (*) b7f1686... nvptx: For '-march=sm_52' and higher, default at least to ' (*) ecb99f6... nvptx: Support '-mptx=7.3' (*) 975638b... nvptx: Add effective-target 'nvptx_softstack', use for effe (*) e5180fb... c++: Honor complain in cp_build_function_call_vec for check (*) dcbd260... nvptx: Clarify that the PTX "native" stack pointer is handl (*) 1823170... nvptx: Handle '__builtin_stack_save()' in a well-behaved wa (*) 2116e8d... nvptx: Add '__builtin_stack_save()', '__builtin_stack_resto (*) 678c3f0... nvptx: Add '__builtin_alloca(0)' test cases [PR65181] (*) 36eee5a... gcc/configure: Fix check for assembler section merging supp (*) c42261d... c++: d
[gcc(refs/users/aoliva/heads/testme)] [ifcombine] propagate signbit mask to xor right-hand operand
https://gcc.gnu.org/g:cc2aaa9ac0d31f8f4098c4276e0695afb7f63fcf commit cc2aaa9ac0d31f8f4098c4276e0695afb7f63fcf Author: Alexandre Oliva Date: Sun Jan 12 22:16:21 2025 -0300 [ifcombine] propagate signbit mask to xor right-hand operand Diff: --- gcc/gimple-fold.cc| 20 gcc/testsuite/gcc.dg/field-merge-20.c | 44 +++ 2 files changed, 64 insertions(+) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index a3987c4590ae..93ed8b3abb05 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8270,6 +8270,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, ll_and_mask = sign; else ll_and_mask &= sign; + if (l_xor) + { + if (!lr_and_mask.get_precision ()) + lr_and_mask = sign; + else + lr_and_mask &= sign; + if (l_const.get_precision ()) + l_const &= wide_int::from (lr_and_mask, + l_const.get_precision (), UNSIGNED); + } } if (rsignbit) @@ -8279,6 +8289,16 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree truth_type, rl_and_mask = sign; else rl_and_mask &= sign; + if (r_xor) + { + if (!rr_and_mask.get_precision ()) + rr_and_mask = sign; + else + rr_and_mask &= sign; + if (r_const.get_precision ()) + r_const &= wide_int::from (rr_and_mask, + r_const.get_precision (), UNSIGNED); + } } /* If either comparison code is not correct for our logical operation, diff --git a/gcc/testsuite/gcc.dg/field-merge-20.c b/gcc/testsuite/gcc.dg/field-merge-20.c new file mode 100644 index ..3c1ec0cbd80f --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-20.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +/* tree-optimization/118409 */ + +/* Check that tests involving a sign bit of a storage unit are handled + correctly. The compares are turned into xor tests by earlier passes, and ifcombine has to propagate the sign bit mask to the right hand of the compare extracted from the */ + +typedef struct { +int p : __CHAR_BIT__; +int d : 1; +int b : __CHAR_BIT__ - 2; +int e : 1; +int f; +} g; + +g a = {.d = 1, .e = 1}, c = {.b = 1, .d = 1, .e = 1}; + +__attribute__((noipa)) +int f1 () +{ + if (a.d == c.d + && a.e == c.e + && a.f == 0) +return 0; + return -1; +} + +__attribute__((noipa)) +int f2 () +{ + if (a.d != c.d + || a.e != c.e + || a.f != 0) +return -1; + return 0; +} + +int main() { + if (f1 () < 0 + || f2 () < 0) +__builtin_abort(); + return 0; +}
[gcc r15-6844] Refactor ix86_expand_vecop_qihi2.
https://gcc.gnu.org/g:0e05b793fba2a9bea9f0fbb1f068679f5dadf514 commit r15-6844-g0e05b793fba2a9bea9f0fbb1f068679f5dadf514 Author: liuhongt Date: Wed Jan 8 23:11:17 2025 -0800 Refactor ix86_expand_vecop_qihi2. Since there's regression to use vpermq, and it's manually disabled by !TARGET_AVX512BW. I remove the codes related to vpermq and make ix86_expand_vecop_qihi2 only handle vpmovbw + op + vpmovwb case. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2): Refactor to avoid redundant TARGET_AVX512BW in many places. Diff: --- gcc/config/i386/i386-expand.cc | 39 +-- 1 file changed, 5 insertions(+), 34 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 2ab57874234b..da030832bba7 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -24864,11 +24864,9 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2) generic permutation to merge the data back into the right place. This permutation results in VPERMQ, which is slow, so better fall back to ix86_expand_vecop_qihi. */ - if (!TARGET_AVX512BW) -return false; - - if ((qimode == V16QImode && !TARGET_AVX2) - || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512)) + if (!TARGET_AVX512BW + || (qimode == V16QImode && !TARGET_AVX512VL) + || (qimode == V32QImode && !TARGET_EVEX512) /* There are no V64HImode instructions. */ || qimode == V64QImode) return false; @@ -24883,8 +24881,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2) { case E_V16QImode: himode = V16HImode; - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen_truncate = gen_truncv16hiv16qi2; + gen_truncate = gen_truncv16hiv16qi2; break; case E_V32QImode: himode = V32HImode; @@ -24926,33 +24923,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2) hdest = expand_simple_binop (himode, code, hop1, hop2, NULL_RTX, 1, OPTAB_DIRECT); - if (gen_truncate) -emit_insn (gen_truncate (dest, hdest)); - else -{ - struct expand_vec_perm_d d; - rtx wqdest = gen_reg_rtx (wqimode); - rtx wqres = gen_lowpart (wqimode, hdest); - bool ok; - int i; - - /* Merge the data back into the right place. */ - d.target = wqdest; - d.op0 = d.op1 = wqres; - d.vmode = wqimode; - d.nelt = GET_MODE_NUNITS (wqimode); - d.one_operand_p = false; - d.testing_p = false; - - for (i = 0; i < d.nelt; ++i) - d.perm[i] = i * 2; - - ok = ix86_expand_vec_perm_const_1 (&d); - gcc_assert (ok); - - emit_move_insn (dest, gen_lowpart (qimode, wqdest)); -} - + emit_insn (gen_truncate (dest, hdest)); return true; }
[gcc r15-6843] [PATCH] crc: Fix up some crc related wrong code issues [PR117997, PR118415]
https://gcc.gnu.org/g:9c387a99a911724546abe99ecd39bfc968ed6333 commit r15-6843-g9c387a99a911724546abe99ecd39bfc968ed6333 Author: Jakub Jelinek Date: Sun Jan 12 17:24:53 2025 -0700 [PATCH] crc: Fix up some crc related wrong code issues [PR117997, PR118415] Hi! As mentioned in the second PR, using table names like crc_table_for_crc_8_polynomial_0x12 in the user namespace is wrong, user could have defined such variables in their code and as can be seen on the last testcase, then it just misbehaves. At minimum such names should start with 2 underscores, moving it into implementation namespace, and if possible have some dot or dollar in the name if target supports it. I think assemble_crc_table right now always emits tables a local variables, I really don't see what would be setting TREE_PUBLIC flag on IDENTIFIER_NODEs. It might be nice to share the tables between TUs in the same binary or shared library, but it in that case should have hidden visibility if possible, so that it isn't exported from the libraries or binaries, we don't want the optimization to affect set of exported symbols from libraries. And, as can be seen in the first PR, building gen_rtx_SYMBOL_REF by hand is certainly unexpected on some targets, e.g. those which use -fsection-anchors, so we should instead use DECL_RTL of the VAR_DECL. For that we'd need to look it up if we haven't emitted it already, while IDENTIFIER_NODEs can be looked up easily, I guess for the VAR_DECLs we'd need custom hash table. Now, all of the above (except sharing between multiple TUs) is already implemented in output_constant_def, so I think it is much better to just use that function. And, if we want to share it between multiple TUs, we could extend the SHF_MERGE usage in gcc, currently we only use it for constant pool entries with same size as alignment, from 1 to 32 bytes, using .rodata.cstN sections. We could just use say .rodata.cstM.N sections where M would be alignment and N would be the entity size. We could use that for all constant pool entries say up to 2048 bytes. Though, as the current code doesn't share between multiple TUs, I think it can be done incrementally (either still for GCC 15, or GCC 16+). Bootstrapped/regtested on {x86_64,i686,aarch64,powerpc64le,s390x}-linux, on aarch64 it also fixes -FAIL: crypto/rsa -FAIL: hash ok for trunk? gcc/ PR tree-optimization/117997 PR middle-end/118415 * expr.cc (assemble_crc_table): Make static, remove id argument, use output_constant_def. Emit note if -fdump-rtl-expand-details about which table has been emitted. (generate_crc_table): Make static, adjust assemble_crc_table caller, call it always. (calculate_table_based_CRC): Make static. * internal-fn.cc (expand_crc_optab_fn): Emit note if -fdump-rtl-expand-details about using optab for crc. Formatting fix. gcc/testsuite/ * gcc.dg/crc-builtin-target32.c: Add -fdump-rtl-expand-details as dg-additional-options. Scan expand dump rather than assembly, adjust the regexps. * gcc.dg/crc-builtin-target64.c: Likewise. * gcc.dg/crc-builtin-rev-target32.c: Likewise. * gcc.dg/crc-builtin-rev-target64.c: Likewise. * gcc.dg/pr117997.c: New test. * gcc.dg/pr118415.c: New test. Diff: --- gcc/expr.cc | 56 gcc/internal-fn.cc | 12 ++- gcc/testsuite/gcc.dg/crc-builtin-rev-target32.c | 7 +- gcc/testsuite/gcc.dg/crc-builtin-rev-target64.c | 7 +- gcc/testsuite/gcc.dg/crc-builtin-target32.c | 7 +- gcc/testsuite/gcc.dg/crc-builtin-target64.c | 8 +- gcc/testsuite/gcc.dg/pr117997.c | 112 gcc/testsuite/gcc.dg/pr118415.c | 25 ++ 8 files changed, 181 insertions(+), 53 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 235e79546113..07fc85712e6b 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -14247,25 +14247,16 @@ calculate_crc (unsigned HOST_WIDE_INT crc, return crc; } -/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS with - given ID. - ID is the identifier of the table, the name of the table is unique, - contains CRC size and the polynomial. +/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS. POLYNOM is the polynomial used to calculate the CRC table's elements. CRC_BITS is the size of CRC, may be 8, 16, ... . */ -rtx -assemble_crc_table (tree id, unsigned HOST_WIDE_INT polynom, - unsigned short crc_bits) +static rtx +assemble_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits) { unsigned tabl
[gcc r13-9309] Zen5 tuning part 5: update instruction latencies in x86-tune-costs
https://gcc.gnu.org/g:f10d381dfc983ea32e5f72faadc7eb8126f114f6 commit r13-9309-gf10d381dfc983ea32e5f72faadc7eb8126f114f6 Author: Jan Hubicka Date: Wed Sep 4 09:19:08 2024 +0200 Zen5 tuning part 5: update instruction latencies in x86-tune-costs there is nothing exciting in this patch. I measured latencies and also compared them with newly released optimization guide. There are no dramatic changes compared to zen4. One interesting new bit is that addss is faster and can be 2 cycles when fed by another addss. I also increased the large insn bound since decoders seems no longer require instructions to be 8 bytes or less. gcc/ChangeLog: * config/i386/x86-tune-costs.h (znver5_cost): Update instruction costs. (cherry picked from commit 4292297a0f938ffc953422fa246ff00fe345fe3d) Diff: --- gcc/config/i386/x86-tune-costs.h | 28 +--- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index b89ac640ea5f..9edc6e36557d 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2034,6 +2034,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of a lea instruction. */ COSTS_N_INSNS (1), /* variable shift costs. */ COSTS_N_INSNS (1), /* constant shift costs. */ + /* mul has latency 3, executes in 3 integer units. */ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ COSTS_N_INSNS (3), /* HI. */ COSTS_N_INSNS (3), /* SI. */ @@ -2041,6 +2042,8 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (3)}, /* other. */ 0, /* cost of multiply per each bit set. */ + /* integer divide has latency of 8 cycles + plus 1 for every 9 bits of quotient. */ {COSTS_N_INSNS (10), /* cost of a divide/mod for QI. */ COSTS_N_INSNS (11), /* HI. */ COSTS_N_INSNS (13), /* SI. */ @@ -2048,7 +2051,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (16)},/* other. */ COSTS_N_INSNS (1), /* cost of movsx. */ COSTS_N_INSNS (1), /* cost of movzx. */ - 8, /* "large" insn. */ + 15, /* "large" insn. */ 9, /* MOVE_RATIO. */ 6, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers @@ -2065,12 +2068,13 @@ struct processor_costs znver5_cost = { 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ - /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops, - throughput 5. Approx 7 uops do not depend on vector size and every load - is 5 uops. */ + + /* TODO: gather and scatter instructions are currently disabled in + x86-tune.def. In some cases they are however a win, see PR116582 + We however need good cost model for them. */ 14, 10, /* Gather load static, per_elt. */ 14, 20, /* Gather store static, per_elt. */ - 32, /* size of l1 cache. */ + 48, /* size of l1 cache. */ 1024,/* size of l2 cache. */ 64, /* size of prefetch block. */ /* New AMD processors never drop prefetches; if they cannot be performed @@ -2080,6 +2084,8 @@ struct processor_costs znver5_cost = { time). */ 100, /* number of parallel prefetches. */ 3, /* Branch cost. */ + /* TODO x87 latencies are still based on znver4. + Probably not very important these days. */ COSTS_N_INSNS (7), /* cost of FADD and FSUB insns. */ COSTS_N_INSNS (7), /* cost of FMUL instruction. */ /* Latency of fdiv is 8-15. */ @@ -2089,16 +2095,24 @@ struct processor_costs znver5_cost = { /* Latency of fsqrt is 4-10. */ COSTS_N_INSNS (25), /* cost of FSQRT instruction. */ + /* SSE instructions have typical throughput 4 and latency 1. */ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ - COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. *
[gcc r15-6840] Dump all symbol attributes in show_attr.
https://gcc.gnu.org/g:f4fa0b7d493a4ba217d989d3df75bbe3730874fc commit r15-6840-gf4fa0b7d493a4ba217d989d3df75bbe3730874fc Author: Thomas Koenig Date: Sun Jan 12 23:02:34 2025 +0100 Dump all symbol attributes in show_attr. gcc/fortran/ChangeLog: * dump-parse-tree.cc (show_attr): Dump all symbol attributes. Diff: --- gcc/fortran/dump-parse-tree.cc | 108 - 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc index 8d31ddfcffb3..97cab3f85f92 100644 --- a/gcc/fortran/dump-parse-tree.cc +++ b/gcc/fortran/dump-parse-tree.cc @@ -835,6 +835,8 @@ show_attr (symbol_attribute *attr, const char * module) fputs (" VOLATILE", dumpfile); if (attr->threadprivate) fputs (" THREADPRIVATE", dumpfile); + if (attr->temporary) +fputs (" TEMPORARY", dumpfile); if (attr->target) fputs (" TARGET", dumpfile); if (attr->dummy) @@ -868,6 +870,8 @@ show_attr (symbol_attribute *attr, const char * module) fputs (" IN-NAMELIST", dumpfile); if (attr->in_common) fputs (" IN-COMMON", dumpfile); + if (attr->in_equivalence) +fputs (" IN_EQUIVALENDE", dumpfile); if (attr->abstract) fputs (" ABSTRACT", dumpfile); @@ -926,6 +930,47 @@ show_attr (symbol_attribute *attr, const char * module) fputs (" OMP-DECLARE-TARGET-LINK", dumpfile); if (attr->omp_declare_target_indirect) fputs (" OMP-DECLARE-TARGET-INDIRECT", dumpfile); + if (attr->omp_device_type == OMP_DEVICE_TYPE_HOST) +fputs (" OMP-DEVICE-TYPE-HOST", dumpfile); + if (attr->omp_device_type == OMP_DEVICE_TYPE_NOHOST) +fputs (" OMP-DEVICE-TYPE-NOHOST", dumpfile); + if (attr->omp_device_type == OMP_DEVICE_TYPE_ANY) +fputs (" OMP-DEVICE-TYPE-ANY", dumpfile); + if (attr->omp_allocate) +fputs (" OMP-ALLOCATE", dumpfile); + + if (attr->oacc_declare_create) +fputs (" OACC-DECLARE-CREATE", dumpfile); + if (attr->oacc_declare_copyin) +fputs (" OACC-DECLARE-COPYIN", dumpfile); + if (attr->oacc_declare_deviceptr) +fputs (" OACC-DECLARE-DEVICEPTR", dumpfile); + if (attr->oacc_declare_device_resident) +fputs (" OACC-DECLARE-DEVICE-RESIDENT", dumpfile); + + switch (attr->oacc_routine_lop) +{ +case OACC_ROUTINE_LOP_NONE: +case OACC_ROUTINE_LOP_ERROR: + break; + +case OACC_ROUTINE_LOP_GANG: + fputs (" OACC-ROUTINE-LOP-GANG", dumpfile); + break; + +case OACC_ROUTINE_LOP_WORKER: + fputs (" OACC-ROUTINE-LOP-WORKER", dumpfile); + break; + +case OACC_ROUTINE_LOP_VECTOR: + fputs (" OACC-ROUTINE-LOP-VECTOR", dumpfile); + break; + +case OACC_ROUTINE_LOP_SEQ: + fputs (" OACC-ROUTINE-LOP-SEQ", dumpfile); + break; + } + if (attr->elemental) fputs (" ELEMENTAL", dumpfile); if (attr->pure) @@ -956,8 +1001,69 @@ show_attr (symbol_attribute *attr, const char * module) fputs (" IS-MAIN-PROGRAM", dumpfile); if (attr->oacc_routine_nohost) fputs (" OACC-ROUTINE-NOHOST", dumpfile); + if (attr->temporary) +fputs (" TEMPORARY", dumpfile); + if (attr->assign) +fputs (" ASSIGN", dumpfile); + if (attr->not_always_present) +fputs (" NOT-ALWAYS-PRESENT", dumpfile); + if (attr->implied_index) +fputs (" IMPLIED-INDEX", dumpfile); + if (attr->proc_pointer) +fputs (" PROC-POINTER", dumpfile); + if (attr->fe_temp) +fputs (" FE-TEMP", dumpfile); + if (attr->automatic) +fputs (" AUTOMATIC", dumpfile); + if (attr->class_pointer) +fputs (" CLASS-POINTER", dumpfile); + if (attr->save == SAVE_EXPLICIT) +fputs (" SAVE-EXPLICIT", dumpfile); + if (attr->save == SAVE_IMPLICIT) +fputs (" SAVE-IMPLICIT", dumpfile); + if (attr->used_in_submodule) +fputs (" USED-IN-SUBMODULE", dumpfile); + if (attr->use_only) +fputs (" USE-ONLY", dumpfile); + if (attr->use_rename) +fputs (" USE-RENAME", dumpfile); + if (attr->imported) +fputs (" IMPORTED", dumpfile); + if (attr->host_assoc) +fputs (" HOST-ASSOC", dumpfile); + if (attr->generic) +fputs (" GENERIC", dumpfile); + if (attr->generic_copy) +fputs (" GENERIC-COPY", dumpfile); + if (attr->untyped) +fputs (" UNTYPED", dumpfile); + if (attr->extension) +fprintf (dumpfile, " EXTENSION(%u)", attr->extension); + if (attr->is_class) +fputs (" IS-CLASS", dumpfile); + if (attr->class_ok) +fputs (" CLASS-OK", dumpfile); + if (attr->vtab) +fputs (" VTAB", dumpfile); + if (attr->vtype) +fputs (" VTYPE", dumpfile); + if (attr->module_procedure) +fputs (" MODULE-PROCEDURE", dumpfile); + if (attr->if_source == IFSRC_DECL) +fputs (" IFSRC-DECL", dumpfile); + if (attr->if_source == IFSRC_IFBODY) +fputs (" IFSRC-IFBODY", dumpfile); + + for (int i = 0; i < EXT_ATTR_LAST; i++) +{ + if (attr->ext_attr & (1 << i)) + { + fputs (" ATTRIBUTE-", dumpfile); + for (const char *p = ext_attr_list[i].name; p && *p; p++) +