[gcc r15-5362] ipa-modref bits for unsequenced and reproducible
https://gcc.gnu.org/g:addf02282026cc23d24fc4445b47b408d484d1e7 commit r15-5362-gaddf02282026cc23d24fc4445b47b408d484d1e7 Author: Jan Hubicka Date: Sun Nov 17 11:54:10 2024 +0100 ipa-modref bits for unsequenced and reproducible C attributes reproducible and unsequenced implies that calling function twice leads to same effect if parameters are otherwise unchanged (function call itself does not count). This is bit bit stronger that modref's notion of nondeterminism that says that same inputs will yield same outputs (function call itself does count). This patch makes reproducible/unsequenced imply determinism and cleans up determinism handling. By itself it is not useful, since we can not make use of it unless we know what are the inputs/outputs of the function which I plan to handle by the "fn spec" attribute. gcc/ChangeLog: * ipa-modref.cc (modref_summary::useful_p): const/pure implies determinism. (modref_summary_lto::useful_p): Likewise. (ignore_nondeterminism_p): Add CALLEE_FNTYPE parameter; check for reproducible/unsequenced (modref_access_analysis::record_access_p): Use ignore_nondeterminism_p when handling volatile accesses. (modref_access_analysis::get_access_for_fnspec): Update. (modref_access_analysis::process_fnspec): Cleanup handling of NOVOPS. (modref_access_analysis::analyze_call): Use ignore_nondeterminism_p when handling asm statements. (modref_access_analysis::analyze_stmt): Update. (propagate_unknown_call): Update. (modref_propagate_in_scc): Update. (ipa_merge_modref_summary_after_inlining): Update. Diff: --- gcc/ipa-modref.cc | 90 --- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index 12ac0e7865a7..08a7740de943 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -336,15 +336,13 @@ modref_summary::useful_p (int ecf_flags, bool check_flags) && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false)) return true; if (ecf_flags & ECF_CONST) -return ((!side_effects || !nondeterministic) - && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); +return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); if (loads && !loads->every_base) return true; else kills.release (); if (ecf_flags & ECF_PURE) -return ((!side_effects || !nondeterministic) - && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); +return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); return stores && !stores->every_base; } @@ -409,15 +407,13 @@ modref_summary_lto::useful_p (int ecf_flags, bool check_flags) && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false)) return true; if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) -return ((!side_effects || !nondeterministic) - && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); +return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); if (loads && !loads->every_base) return true; else kills.release (); if (ecf_flags & ECF_PURE) -return ((!side_effects || !nondeterministic) - && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); +return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE)); return stores && !stores->every_base; } @@ -794,13 +790,25 @@ namespace { /* Return true if ECF flags says that nondeterminism can be ignored. */ static bool -ignore_nondeterminism_p (tree caller, int flags) +ignore_nondeterminism_p (tree caller, int flags, tree callee_fntype) { - if (flags & (ECF_CONST | ECF_PURE)) + int caller_flags = flags_from_decl_or_type (caller); + if ((flags | caller_flags) & (ECF_CONST | ECF_PURE)) return true; if ((flags & (ECF_NORETURN | ECF_NOTHROW)) == (ECF_NORETURN | ECF_NOTHROW) || (!opt_for_fn (caller, flag_exceptions) && (flags & ECF_NORETURN))) return true; + /* C language defines unsequenced and reproducible functions + to be deterministic. */ + if (lookup_attribute ("unsequenced", TYPE_ATTRIBUTES (TREE_TYPE (caller))) + || lookup_attribute ("reproducible", + TYPE_ATTRIBUTES (TREE_TYPE (caller +return true; + if (callee_fntype + && (lookup_attribute ("unsequenced", TYPE_ATTRIBUTES (callee_fntype)) + || lookup_attribute ("reproducible", + TYPE_ATTRIBUTES (callee_fntype +return true; return false; } @@ -1151,7 +1159,8 @@ modref_access_analysis::record_access_lto (modref_records_lto *tt, ao_ref *ref, bool modref_access_analysis::record_access_p (tree expr) { - if (TREE_THIS_VOLATILE (expr)) + if (TREE_THIS_VOLATILE (expr) + && !ignore_nondeterminism_p (current_function_decl, 0, NULL)) { if
[gcc(refs/users/meissner/heads/work186-sha)] Merge commit 'refs/users/meissner/heads/work186-sha' of git+ssh://gcc.gnu.org/git/gcc into me/work18
https://gcc.gnu.org/g:62082e9dacf703d47b7735d7a3b7d19bb23ef800 commit 62082e9dacf703d47b7735d7a3b7d19bb23ef800 Merge: 234501f7cd56 bd08a23488c8 Author: Michael Meissner Date: Sat Nov 16 19:23:26 2024 -0500 Merge commit 'refs/users/meissner/heads/work186-sha' of git+ssh://gcc.gnu.org/git/gcc into me/work186-sha Diff:
[gcc r15-5346] doc: Streamline hppa*-hp-hpux11 installation instructions
https://gcc.gnu.org/g:4a4bd60fa08b9e1079ebead6cb8c3ce82c7f9ef6 commit r15-5346-g4a4bd60fa08b9e1079ebead6cb8c3ce82c7f9ef6 Author: Gerald Pfeifer Date: Sat Nov 16 16:46:50 2024 +0100 doc: Streamline hppa*-hp-hpux11 installation instructions A HP/UX linker patch from the GCC 3.3 era and Binutils 2.14 no longer should require special mention. These originally came in via commit c51244972206 in April 2004 as * doc/install.texi: Update HP-UX 11 installation procedure. gcc: PR target/69374 * doc/install.texi (Specific) : Remove references to HP/UX linker patch from 2004 and Binutils 2.14. Diff: --- gcc/doc/install.texi | 24 1 file changed, 24 deletions(-) diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 705440ffd330..fc74ee3d0e16 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -4222,30 +4222,6 @@ result, it's not possible to switch linkers in the middle of a GCC build. This has been reported to sometimes occur in unified builds of binutils and GCC@. -A recent linker patch must be installed for the correct operation of -GCC 3.3 and later. @code{PHSS_26559} and @code{PHSS_24304} are the -oldest linker patches that are known to work. They are for HP-UX -11.00 and 11.11, respectively. @code{PHSS_24303}, the companion to -@code{PHSS_24304}, might be usable but it hasn't been tested. These -patches have been superseded. Consult the HP patch database to obtain -the currently recommended linker patch for your system. - -The patches are necessary for the support of weak symbols on the -32-bit port, and for the running of initializers and finalizers. Weak -symbols are implemented using SOM secondary definition symbols. Prior -to HP-UX 11, there are bugs in the linker support for secondary symbols. -The patches correct a problem of linker core dumps creating shared -libraries containing secondary symbols, as well as various other -linking issues involving secondary symbols. - -GCC 3.3 uses the ELF DT_INIT_ARRAY and DT_FINI_ARRAY capabilities to -run initializers and finalizers on the 64-bit port. The 32-bit port -uses the linker @option{+init} and @option{+fini} options for the same -purpose. The patches correct various problems with the +init/+fini -options, including program core dumps. Binutils 2.14 corrects a -problem on the 64-bit port resulting from HP's non-standard use of -the .init and .fini sections for array initializers and finalizers. - Only the HP linker is supported for the @samp{hppa64-hp-hpux11*} target. At this time, the GNU linker does not support the creation of long
[gcc(refs/users/meissner/heads/work186-dmf)] RFC2653-PowerPC: Add support for 1, 024 bit DMR registers.
https://gcc.gnu.org/g:dab4366f82e7b5d911175bee56dd7b951cc766fe commit dab4366f82e7b5d911175bee56dd7b951cc766fe Author: Michael Meissner Date: Sat Nov 16 01:35:43 2024 -0500 RFC2653-PowerPC: Add support for 1,024 bit DMR registers. This patch is a prelimianry patch to add the full 1,024 bit dense math register (DMRs) for -mcpu=future. The MMA 512-bit accumulators map onto the top of the DMR register. This patch only adds the new 1,024 bit register support. It does not add support for any instructions that need 1,024 bit registers instead of 512 bit registers. I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit registers. The 'wD' constraint added in previous patches is used for these registers. I added support to do load and store of DMRs via the VSX registers, since there are no load/store dense math instructions. I added the new keyword '__dmr' to create 1,024 bit types that can be loaded into DMRs. At present, I don't have aliases for __dmr512 and __dmr1024 that we've discussed internally. The patches have been tested on both little and big endian systems. Can I check it into the master branch? 2024-11-16 Michael Meissner gcc/ * config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec. (UNSPEC_DM_INSERT512_LOWER): Likewise. (UNSPEC_DM_EXTRACT512): Likewise. (UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise. (UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise. (movtdo): New define_expand and define_insn_and_split to implement 1,024 bit DMR registers. (movtdo_insert512_upper): New insn. (movtdo_insert512_lower): Likewise. (movtdo_extract512): Likewise. (reload_dmr_from_memory): Likewise. (reload_dmr_to_memory): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR support. (rs6000_init_builtins): Add support for __dmr keyword. * config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add support for TDOmode. (rs6000_function_arg): Likewise. * config/rs6000/rs6000-modes.def (TDOmode): New mode. * config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add support for TDOmode. (rs6000_hard_regno_mode_ok_uncached): Likewise. (rs6000_hard_regno_mode_ok): Likewise. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Add support for TDOmode. Setup reload hooks for DMR mode. (reg_offset_addressing_ok_p): Add support for TDOmode. (rs6000_emit_move): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_secondary_reload_class): Likewise. (rs6000_mangle_type): Add mangling for __dmr type. (rs6000_dmr_register_move_cost): Add support for TDOmode. (rs6000_split_multireg_move): Likewise. (rs6000_invalid_conversion): Likewise. * config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode. (enum rs6000_builtin_type_index): Add DMR type nodes. (dmr_type_node): Likewise. (ptr_dmr_type_node): Likewise. gcc/testsuite/ * gcc.target/powerpc/dm-1024bit.c: New test. Diff: --- gcc/config/rs6000/mma.md | 154 ++ gcc/config/rs6000/rs6000-builtin.cc | 17 +++ gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-modes.def| 4 + gcc/config/rs6000/rs6000.cc | 101 - gcc/config/rs6000/rs6000.h| 6 +- gcc/testsuite/gcc.target/powerpc/dm-1024bit.c | 63 +++ 7 files changed, 321 insertions(+), 34 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 2e04eb653fa6..8461499e1c3d 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -92,6 +92,11 @@ UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC UNSPEC_MMA_DMSETDMRZ + UNSPEC_DM_INSERT512_UPPER + UNSPEC_DM_INSERT512_LOWER + UNSPEC_DM_EXTRACT512 + UNSPEC_DMR_RELOAD_FROM_MEMORY + UNSPEC_DMR_RELOAD_TO_MEMORY ]) (define_c_enum "unspecv" @@ -793,3 +798,152 @@ } [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) + +;; TDOmode (__dmr keyword for 1,024 bit registers). +(define_expand "movtdo" + [(set (match_operand:TDO 0 "nonimmediate_operand") + (match_operand:TDO 1 "input_operand"))] + "TARGET_MMA_DENSE_MATH" +{ + rs6000_emit_move (operands[0], operands[1], TDOmode); + DONE; +}) + +(define_insn_and_split "*movtdo" + [(set (match_operand:TDO
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:2759b96d463e0bc7f66c2ac7a9c90f0a0a4873ff commit 2759b96d463e0bc7f66c2ac7a9c90f0a0a4873ff Author: Michael Meissner Date: Sun Nov 17 20:20:26 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ PR target/108598 * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ PR target/108598 * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 26 gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 + 2 files changed, 73 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..e4c52f937d33 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,32 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1" + "&& reload_completed && int_reg_operand (operands[0], TImode)" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 3) + (const_int 0))] +{ + int lo = BYTES_BIG_ENDIAN ? 1 : 0; + int hi = 1 - lo; + int r = reg_or_subregno (operands[0]); + + operands[2] = gen_rtx_REG (DImode, r + lo); + operands[3] = gen_rtx_REG (DImode, r + hi); +} + [(set_attr "type" "*,load,mtvsr,vecload") + (set_attr "length" "8,8,*,*") + (set_attr "isa" "*,*,*,p10")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..aa79dc880c8e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
[gcc r15-5377] testsuite: Fix pr101145inf*.c testcases [PR117494]
https://gcc.gnu.org/g:0dc389f21bfd4ee49d57bcfaa1d1936456c55e48 commit r15-5377-g0dc389f21bfd4ee49d57bcfaa1d1936456c55e48 Author: Andrew Pinski Date: Sun Nov 17 13:40:22 2024 -0800 testsuite: Fix pr101145inf*.c testcases [PR117494] Instead of doing a dg-run with a specific target check for linux. Use signal as the effective-target since this requires the use of ALARM signal to do the testing. Also use check_vect in the main and renames main to main1 to make sure we don't use the registers. Tested on x86_64-linux-gnu. PR testsuite/117494 gcc/testsuite/ChangeLog: * gcc.dg/vect/pr101145inf.c: Remove dg-do and replace with dg-require-effective-target of signal. * gcc.dg/vect/pr101145inf_1.c: Likewise. * gcc.dg/vect/pr101145inf.inc: Rename main to main1 and mark as noinline. Include tree-vect.h. Have main call check_vect and main1. Signed-off-by: Andrew Pinski Diff: --- gcc/testsuite/gcc.dg/vect/pr101145inf.c | 2 +- gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 9 - gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c b/gcc/testsuite/gcc.dg/vect/pr101145inf.c index 3ad8c1a2dd7b..aa598875aa59 100644 --- a/gcc/testsuite/gcc.dg/vect/pr101145inf.c +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c @@ -1,4 +1,4 @@ -/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-require-effective-target signal } */ /* { dg-additional-options "-O3" } */ #include #include "pr101145inf.inc" diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc index 4aa3d0491878..eb855b9881ab 100644 --- a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc @@ -1,6 +1,7 @@ #include #include #include +#include "tree-vect.h" void test_finite (); void test_infinite (); @@ -10,7 +11,8 @@ void do_exit (int i) exit (0); } -int main(void) +__attribute__((noinline)) +int main1(void) { test_finite (); struct sigaction s; @@ -26,3 +28,8 @@ int main(void) return 1; } +int main(void) +{ + check_vect (); + return main1(); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c index e3e9dd46d10a..0465788c3cca 100644 --- a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c @@ -1,4 +1,4 @@ -/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-require-effective-target signal } */ /* { dg-additional-options "-O3" } */ #include #include "pr101145inf.inc"
[gcc r15-5375] Improve ext-dce's ability to eliminate more extensions
https://gcc.gnu.org/g:beec291225be9b5e7a60b6818cf80224c343811d commit r15-5375-gbeec291225be9b5e7a60b6818cf80224c343811d Author: Jeff Law Date: Sun Nov 17 16:44:09 2024 -0700 Improve ext-dce's ability to eliminate more extensions I was looking at a regression in ext-dce's behavior just before Cauldron. Essentially a bugfix in ext-dce ended up causing us to fail to eliminate some useless extensions. When we have a SUBREG object with SUBREG_PROMOTED_VAR* flags set, we generally have to be more conservative in how we process bit group liveness, making bits live that wouldn't obviously be live otherwise. That's not always necessary though. For example, if we're storing a promoted subreg into memory, we may not care about those extra live bits on this instance of the subreg object (remember subregs are not shared!). Essentially if the mode of the memory reference is not wider than the mode of the inner REG, then we can clear the promoted state which in turn may allow more extension elimination. So at the start of ext-dce we do a simple pass over the IL and remove promoted subreg state when it's obviously safe to do so (memory stores when the modes allow it). That prevents extra bits from being live and ultimately allows us to remove more useless extensions. The testcase is in theory generic, but many targets won't have an opportunity to optimize this case. So rather then build out a large inclusion/exclusion list, I've just made the test risc-v specific. Bootstrapped and regression tested on aarch64, riscv64, s390x, etc in my tester. gcc/ * ext-dce.cc (maybe_clear_subreg_promoted_p): New function. (ext_dce_execute): Call it. gcc/testsuite * gcc.target/riscv/ext-dce-1.c: New test. Diff: --- gcc/ext-dce.cc | 35 gcc/testsuite/gcc.target/riscv/ext-dce-1.c | 44 ++ 2 files changed, 79 insertions(+) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 0ece37726c7e..649d39fadf99 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -941,6 +941,38 @@ ext_dce_process_bb (basic_block bb) } } +/* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and + is usually helpful. However, in some cases setting the value when + it not strictly needed can cause this pass to miss optimizations. + + Specifically consider (set (mem) (subreg (reg))). If set in that + case it will cause more bit groups to be live for REG than would + be strictly necessary which in turn can inhibit extension removal. + + So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it + is obviously not needed. */ + +static void +maybe_clear_subreg_promoted_p (void) +{ + for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn)) +{ + if (!NONDEBUG_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + /* There may be other cases where we should clear, but for +now, this is the only known case where it causes problems. */ + if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set)) +&& GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set + SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0; +} +} + + /* We optimize away sign/zero extensions in this pass and replace them with SUBREGs indicating certain bits are don't cares. @@ -1077,6 +1109,9 @@ static bool ext_dce_rd_confluence_n (edge) { return true; } void ext_dce_execute (void) { + /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful + to this pass. Clear it for those cases. */ + maybe_clear_subreg_promoted_p (); df_analyze (); ext_dce_init (); diff --git a/gcc/testsuite/gcc.target/riscv/ext-dce-1.c b/gcc/testsuite/gcc.target/riscv/ext-dce-1.c new file mode 100644 index ..295d956ef556 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/ext-dce-1.c @@ -0,0 +1,44 @@ +/* { dg-options "-O2 -fdump-rtl-ext_dce" } */ +typedef unsigned char __uint8_t; +typedef unsigned int __uint32_t; +typedef __uint8_t uint8_t; +typedef __uint32_t uint32_t; +static inline void +unaligned_write32le(uint8_t *buf, uint32_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + buf[2] = num >> 16; + buf[3] = num >> 24; + return; +} +typedef struct { + uint32_t dict_size; +} lzma_options_lzma; +typedef void lzma_coder; +typedef struct lzma_next_coder_s lzma_next_coder; +struct lzma_next_coder_s { + lzma_coder *coder; +}; +struct lzma_coder_s { + uint8_t header[(1 + 4 + 8)]; +}; + +void +alone_encoder_init(lzma_next_coder *next, const lzma_options_lzma *options) +{ + uint32_t d = options->dict_size - 1; + d |= d >> 2; +#if 0 + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; +#endif + if (d != (4294967295U)) + ++d; + unaligned_write32le(((struct lzma_coder_s*)next->coder)->heade
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:0a1ae1fb1364e066499c888790c63b06872b622e commit 0a1ae1fb1364e066499c888790c63b06872b622e Author: Michael Meissner Date: Sun Nov 17 23:27:17 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ PR target/108598 * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ PR target/108598 * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 46 ++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 73 + 2 files changed, 119 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..bfb02b07ef4e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,52 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa") + (zero_extend:TI +(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))] + "TARGET_P9_VECTOR && TARGET_POWERPC64" + "@ + # + mtvsrdd %x0,0,%1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || vsx_register_operand (operands[1], DImode))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = reg_or_subregno (op0); + + if (int_reg_operand (op0, TImode)) +{ + int lo = BYTES_BIG_ENDIAN ? 1 : 0; + int hi = 1 - lo; + + operands[2] = gen_rtx_REG (DImode, r + lo); + operands[3] = op1; + operands[4] = gen_rtx_REG (DImode, r + hi); + operands[5] = const0_rtx; +} + else +{ + rtx op0_di = gen_rtx_REG (DImode, r); + rtx op0_v2di = gen_rtx_REG (V2DImode, r); + rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di; + rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); +} +} + [(set_attr "type" "*,mtvsr,vecperm") + (set_attr "length" "8,*,8")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..52a969507cb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,73 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +union u { + double d; + unsigned long long u64; +}; + +void +gpr_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +vsx_to_vsx (double d, __uint128_t *p) +{ + unsigned long long x; + __uint128_t y; + union u u2; + + u2.d = d; + x = u2.u64; + + __asm__ (" # %x0" : "+wa" (x)); + + /* xxspltib and xxpermdi. */ + y = x; + __asm__ (" # %x0" : "+wa" (y)); + + *p = y; +} + +void +gpr_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr and li. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +vsx_to_gpr (double d, __uint128_t *p) +{ + unsigned long long x; + __uint128_t y; + union u u2; + + u2.d = d; + x = u2.u64; + + __asm__ (" # %x0" : "+wa" (x)); + + /* mfvsrd and li. */ + y = x; + __asm__ (" # %0" : "+r" (y)); + + *p = y; +} + +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:4b4bcc55e5a0f454ba5c559af58c0e224ddce3d8 commit 4b4bcc55e5a0f454ba5c559af58c0e224ddce3d8 Author: Michael Meissner Date: Sun Nov 17 23:39:05 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 46 -- gcc/testsuite/gcc.target/powerpc/pr108958.c | 73 - 2 files changed, 119 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bfb02b07ef4e..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,52 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa") - (zero_extend:TI -(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))] - "TARGET_P9_VECTOR && TARGET_POWERPC64" - "@ - # - mtvsrdd %x0,0,%1 - #" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || vsx_register_operand (operands[1], DImode))" - [(set (match_dup 2) - (match_dup 3)) - (set (match_dup 4) - (match_dup 5))] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - int r = reg_or_subregno (op0); - - if (int_reg_operand (op0, TImode)) -{ - int lo = BYTES_BIG_ENDIAN ? 1 : 0; - int hi = 1 - lo; - - operands[2] = gen_rtx_REG (DImode, r + lo); - operands[3] = op1; - operands[4] = gen_rtx_REG (DImode, r + hi); - operands[5] = const0_rtx; -} - else -{ - rtx op0_di = gen_rtx_REG (DImode, r); - rtx op0_v2di = gen_rtx_REG (V2DImode, r); - rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di; - rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1; - - operands[2] = op0_v2di; - operands[3] = CONST0_RTX (V2DImode); - operands[4] = op0_v2di; - operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); -} -} - [(set_attr "type" "*,mtvsr,vecperm") - (set_attr "length" "8,*,8")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index 52a969507cb1.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,73 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target lp64 } */ -/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -union u { - double d; - unsigned long long u64; -}; - -void -gpr_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -vsx_to_vsx (double d, __uint128_t *p) -{ - unsigned long long x; - __uint128_t y; - union u u2; - - u2.d = d; - x = u2.u64; - - __asm__ (" # %x0" : "+wa" (x)); - - /* xxspltib and xxpermdi. */ - y = x; - __asm__ (" # %x0" : "+wa" (y)); - - *p = y; -} - -void -gpr_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr and li. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -vsx_to_gpr (double d, __uint128_t *p) -{ - unsigned long long x; - __uint128_t y; - union u u2; - - u2.d = d; - x = u2.u64; - - __asm__ (" # %x0" : "+wa" (x)); - - /* mfvsrd and li. */ - y = x; - __asm__ (" # %0" : "+r" (y)); - - *p = y; -} - -/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ -/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:71cce6c77479b90ade0682fe7c9ec7498f00a4fd commit 71cce6c77479b90ade0682fe7c9ec7498f00a4fd Author: Michael Meissner Date: Mon Nov 18 00:04:34 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ PR target/108598 * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ PR target/108598 * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 46 + gcc/testsuite/gcc.target/powerpc/pr108958.c | 27 + 2 files changed, 73 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..bfb02b07ef4e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,52 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa") + (zero_extend:TI +(match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))] + "TARGET_P9_VECTOR && TARGET_POWERPC64" + "@ + # + mtvsrdd %x0,0,%1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || vsx_register_operand (operands[1], DImode))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = reg_or_subregno (op0); + + if (int_reg_operand (op0, TImode)) +{ + int lo = BYTES_BIG_ENDIAN ? 1 : 0; + int hi = 1 - lo; + + operands[2] = gen_rtx_REG (DImode, r + lo); + operands[3] = op1; + operands[4] = gen_rtx_REG (DImode, r + hi); + operands[5] = const0_rtx; +} + else +{ + rtx op0_di = gen_rtx_REG (DImode, r); + rtx op0_v2di = gen_rtx_REG (V2DImode, r); + rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di; + rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); +} +} + [(set_attr "type" "*,mtvsr,vecperm") + (set_attr "length" "8,*,8")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..03eb58d069e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +gpr_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +gpr_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr and li. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +/* { dg-final { scan-assembler-times {\mli\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */
[gcc r15-5369] c: Implement -Wdeprecated-non-prototype
https://gcc.gnu.org/g:701d8e7e60b85809cae348c1e9edb3b0f4924325 commit r15-5369-g701d8e7e60b85809cae348c1e9edb3b0f4924325 Author: Florian Weimer Date: Sun Nov 17 19:42:33 2024 +0100 c: Implement -Wdeprecated-non-prototype This warning covers the C23 incompibilities resulting from using () as parameter lists in function declarations. The warning name comes from Clang. The implementation is not perfect because GCC treats these two declarations as equivalent: void f (); void f (not_a_type); This is a bit confusing because they are clearly visually distinct. However, as of GCC 14, the second form is an error by default, so treating both the same as far as -Wdeprecated-non-prototype does not seem so bad from a user experience view. gcc/c-family/ PR c/95445 * c-opts.cc (c_common_post_options): Initialize warn_deprecated_non_prototype. * c.opt (Wdeprecated-non-prototype): New option. * c.opt.urls: Regenerate. gcc/c/ PR c/95445 * c-decl.cc (start_function): Warn about parameters after parameter-less declaration. * c-typeck.cc (build_function_call_vec): Pass fntype to convert_arguments. (convert_arguments): Change argument to fntype and compute typelist. Warn about parameter list mismatches on first parameter. gcc/ PR c/95445 * doc/invoke.texi: Document -Wdeprecated-non-prototype. gcc/testsuite/ PR c/95445 * gcc.dg/Wdeprecated-non-prototype-1.c: New test. * gcc.dg/Wdeprecated-non-prototype-2.c: New test. * gcc.dg/Wdeprecated-non-prototype-3.c: New test. * gcc.dg/Wdeprecated-non-prototype-4.c: New test. Diff: --- gcc/c-family/c-opts.cc | 3 +++ gcc/c-family/c.opt | 4 gcc/c-family/c.opt.urls| 3 +++ gcc/c/c-decl.cc| 18 +++ gcc/c/c-typeck.cc | 26 ++ gcc/doc/invoke.texi| 12 +- gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-1.c | 24 gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-2.c | 22 ++ gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-3.c | 18 +++ gcc/testsuite/gcc.dg/Wdeprecated-non-prototype-4.c | 22 ++ 10 files changed, 147 insertions(+), 5 deletions(-) diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index f66a0a0ca888..3a3464ccc3f2 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -1006,6 +1006,9 @@ c_common_post_options (const char **pfilename) = ((pedantic && !flag_isoc23 && warn_c11_c23_compat != 0) || warn_c11_c23_compat > 0); + if (warn_deprecated_non_prototype == -1) +warn_deprecated_non_prototype = warn_c11_c23_compat > 0; + /* -Wshift-negative-value is enabled by -Wextra in C99 and C++11 to C++17 modes. */ if (warn_shift_negative_value == -1) diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 8224c82bfdf8..220421accf47 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -668,6 +668,10 @@ Wdeprecated-literal-operator C++ ObjC++ Var(warn_deprecated_literal_operator) Warning Warn about deprecated space between "" and suffix in a user-defined literal operator. +Wdeprecated-non-prototype +C ObjC Var(warn_deprecated_non_prototype) Init(-1) Warning +Warn about calls with arguments to functions declared without parameters. + Wdesignated-init C ObjC Var(warn_designated_init) Init(1) Warning Warn about positional initialization of structs requiring designated initializers. diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index 04f8e2ee0087..91918c492047 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -307,6 +307,9 @@ UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wdeprecated-enum-float-conv Wdeprecated-literal-operator UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wdeprecated-literal-operator) +Wdeprecated-non-prototype +UrlSuffix(gcc/Warning-Options.html#index-Wdeprecated-non-prototype) + Wdesignated-init UrlSuffix(gcc/Warning-Options.html#index-Wdesignated-init) diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 7e65bd04b11e..1128d72ccb03 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -10750,6 +10750,24 @@ start_function (struct c_declspecs *declspecs, struct c_declarator *declarator, } } + /* Optionally warn about C23 compatibility. */ + if (warn_deprecated_non_prototype + && old_decl != NULL_TREE + && TREE_CODE (oldtype) == FUNCTION_TYPE + && !TYPE_ARG_TYPES (oldtype) + && !TYPE_NO_NAMED_ARGS_STDARG_P (oldtype) + && (TYPE_ARG_TYPES (newtype)
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:f6b4501037856557f5a8e10ce31f894bcd4d9320 commit f6b4501037856557f5a8e10ce31f894bcd4d9320 Author: Michael Meissner Date: Sun Nov 17 17:15:08 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 47 gcc/testsuite/gcc.target/powerpc/pr108958.c | 55 + 2 files changed, 102 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..e3ac69430f39 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,53 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || (vsx_register_operand (operands[0], TImode) + && vsx_register_operand (operands[1], DImode)))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (int_reg_operand (op0, TImode)) +{ + operands[2] = gen_lowpart (DImode, op0); + operands[3] = op1; + operands[4] = gen_highpart (DImode, op0); + operands[5] = const0_rtx; +} + else +{ + int op0_r = reg_or_subregno (op0); + rtx op0_di = gen_rtx_REG (DImode, op0_r); + rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); + rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; + rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); +} +} + [(set_attr "type" "*,load,mtvsr,vecload,vecperm") + (set_attr "length" "8,8,*,*,8") + (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..948ba327591b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,55 @@ +/* { dg-do compile { int128 && lp64 && power10_ok } } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +vsx_to_vsx (double d, __uint128_t *p) +{ + /* xxspltib + xxpermdir. */ + unsigned long long ull = d; + __uint128_t x = ull; + __asm__ (" # %x0" : "+wa" (x)); + *p = x; +} + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxrdx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:316942ce507e26f073791fe351bb3486278f6614 commit 316942ce507e26f073791fe351bb3486278f6614 Author: Michael Meissner Date: Sun Nov 17 18:40:50 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 47 --- gcc/testsuite/gcc.target/powerpc/pr108958.c | 58 - 2 files changed, 105 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e3ac69430f39..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,53 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") - (zero_extend:TI -(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] - "TARGET_DIRECT_MOVE_64BIT" - "@ - # - # - mtvsrdd %x0,0,%1 - lxvrdx %x0,%y1 - #" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || (vsx_register_operand (operands[0], TImode) - && vsx_register_operand (operands[1], DImode)))" - [(set (match_dup 2) - (match_dup 3)) - (set (match_dup 4) - (match_dup 5))] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - if (int_reg_operand (op0, TImode)) -{ - operands[2] = gen_lowpart (DImode, op0); - operands[3] = op1; - operands[4] = gen_highpart (DImode, op0); - operands[5] = const0_rtx; -} - else -{ - int op0_r = reg_or_subregno (op0); - rtx op0_di = gen_rtx_REG (DImode, op0_r); - rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); - rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; - rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; - - operands[2] = op0_v2di; - operands[3] = CONST0_RTX (V2DImode); - operands[4] = op0_v2di; - operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); -} -} - [(set_attr "type" "*,load,mtvsr,vecload,vecperm") - (set_attr "length" "8,8,*,*,8") - (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index 80155cff0b9e.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,58 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target lp64 } */ -/* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -void -arg_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -mem_to_vsx (unsigned long long *p, __uint128_t *q) -{ - /* lxrdx vsx,0,ptr. */ - __uint128_t y = *p; - __asm__ (" # %x0" : "+wa" (y)); - *q = y; -} - - -void -vsx_to_vsx (double d, __uint128_t *p) -{ - /* xxspltib + xxpermdir. */ - unsigned long long ull = d; - __uint128_t x = ull; - __asm__ (" # %x0" : "+wa" (x)); - *p = x; -} - -void -arg_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -mem_to_gpr (unsigned long long *p, __uint128_t *q) -{ - /* ld gpr1_lo,addr; li gpr1_hi,0. */ - __uint128_t y = *p; - __asm__ (" # %0" : "+r" (y)); - *q = y; -} - -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxrdx\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:76887290ab16f09902023939705489cb7a63b8be commit 76887290ab16f09902023939705489cb7a63b8be Author: Michael Meissner Date: Sun Nov 17 19:05:35 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 47 --- gcc/testsuite/gcc.target/powerpc/pr108958.c | 58 - 2 files changed, 105 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e3ac69430f39..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,53 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") - (zero_extend:TI -(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] - "TARGET_DIRECT_MOVE_64BIT" - "@ - # - # - mtvsrdd %x0,0,%1 - lxvrdx %x0,%y1 - #" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || (vsx_register_operand (operands[0], TImode) - && vsx_register_operand (operands[1], DImode)))" - [(set (match_dup 2) - (match_dup 3)) - (set (match_dup 4) - (match_dup 5))] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - if (int_reg_operand (op0, TImode)) -{ - operands[2] = gen_lowpart (DImode, op0); - operands[3] = op1; - operands[4] = gen_highpart (DImode, op0); - operands[5] = const0_rtx; -} - else -{ - int op0_r = reg_or_subregno (op0); - rtx op0_di = gen_rtx_REG (DImode, op0_r); - rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); - rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; - rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; - - operands[2] = op0_v2di; - operands[3] = CONST0_RTX (V2DImode); - operands[4] = op0_v2di; - operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); -} -} - [(set_attr "type" "*,load,mtvsr,vecload,vecperm") - (set_attr "length" "8,8,*,*,8") - (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index 863b3dbabc79.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,58 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target lp64 } */ -/* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -void -arg_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -mem_to_vsx (unsigned long long *p, __uint128_t *q) -{ - /* lxrdx vsx,0,ptr. */ - __uint128_t y = *p; - __asm__ (" # %x0" : "+wa" (y)); - *q = y; -} - - -void -vsx_to_vsx (double d, __uint128_t *p) -{ - /* xxspltib + xxpermdir. */ - unsigned long long ull = d; - __uint128_t x = ull; - __asm__ (" # %x0" : "+wa" (x)); - *p = x; -} - -void -arg_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -mem_to_gpr (unsigned long long *p, __uint128_t *q) -{ - /* ld gpr1_lo,addr; li gpr1_hi,0. */ - __uint128_t y = *p; - __asm__ (" # %0" : "+r" (y)); - *q = y; -} - -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:83e485e74b736938063a8b9edc95255f78d633cc commit 83e485e74b736938063a8b9edc95255f78d633cc Author: Michael Meissner Date: Sun Nov 17 19:11:17 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 25 +++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 + 2 files changed, 72 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..ccd98676d878 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,31 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || (vsx_register_operand (operands[0], TImode) + && vsx_register_operand (operands[1], DImode)))" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 3) + (const_int 0))] +{ + operands[2] = gen_lowpart (DImode, operands[0]); + operands[3] = gen_highpart (DImode, operands[0]); +} + [(set_attr "type" "*,load,mtvsr,vecload") + (set_attr "length" "8,8,*,*") + (set_attr "isa" "*,*,*,p10")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..aa79dc880c8e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:061572fae4b52aeb49cf46b5b054368f20fa0dd4 commit 061572fae4b52aeb49cf46b5b054368f20fa0dd4 Author: Michael Meissner Date: Sun Nov 17 19:20:27 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 25 --- gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 - 2 files changed, 72 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ccd98676d878..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,31 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa") - (zero_extend:TI -(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z")))] - "TARGET_DIRECT_MOVE_64BIT" - "@ - # - # - mtvsrdd %x0,0,%1 - lxvrdx %x0,%y1" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || (vsx_register_operand (operands[0], TImode) - && vsx_register_operand (operands[1], DImode)))" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 3) - (const_int 0))] -{ - operands[2] = gen_lowpart (DImode, operands[0]); - operands[3] = gen_highpart (DImode, operands[0]); -} - [(set_attr "type" "*,load,mtvsr,vecload") - (set_attr "length" "8,8,*,*") - (set_attr "isa" "*,*,*,p10")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index aa79dc880c8e.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target lp64 } */ -/* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -void -arg_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -mem_to_vsx (unsigned long long *p, __uint128_t *q) -{ - /* lxrdx vsx,0,ptr. */ - __uint128_t y = *p; - __asm__ (" # %x0" : "+wa" (y)); - *q = y; -} - - -void -arg_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -mem_to_gpr (unsigned long long *p, __uint128_t *q) -{ - /* ld gpr1_lo,addr; li gpr1_hi,0. */ - __uint128_t y = *p; - __asm__ (" # %0" : "+r" (y)); - *q = y; -} - -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:8ebc82fff91b49020384d256b7ae3a4dec9aa893 commit 8ebc82fff91b49020384d256b7ae3a4dec9aa893 Author: Michael Meissner Date: Sun Nov 17 19:22:29 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 22 ++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 + 2 files changed, 69 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..914026b6bb8d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,28 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1" + "&& reload_completed && int_reg_operand (operands[0], TImode)" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 3) + (const_int 0))] +{ + operands[2] = gen_lowpart (DImode, operands[0]); + operands[3] = gen_highpart (DImode, operands[0]); +} + [(set_attr "type" "*,load,mtvsr,vecload") + (set_attr "length" "8,8,*,*") + (set_attr "isa" "*,*,*,p10")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..aa79dc880c8e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
[gcc r14-10937] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]
https://gcc.gnu.org/g:8b4bb54e6c45411845ec559c49f594a6239c3969 commit r14-10937-g8b4bb54e6c45411845ec559c49f594a6239c3969 Author: Hu, Lin1 Date: Wed Nov 6 15:42:13 2024 +0800 i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418] -maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to 64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. gcc/ChangeLog: PR target/117418 * config/i386/i386-expand.cc (ix86_expand_builtin): Convert pointer's mode according to Pmode. gcc/testsuite/ChangeLog: PR target/117418 * gcc.target/i386/pr117418-1.c: New test. (cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99) Diff: --- gcc/config/i386/i386-expand.cc | 12 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 2 files changed, 36 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 7019116fcac1..8e9dde145cfb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13477,6 +13477,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -13512,6 +13515,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -13536,6 +13542,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -13573,6 +13582,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index ..4839b139b79a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ +__builtin_ia32_aesdec128kl_u8 (&a, a, &a); +__builtin_ia32_aesdec256kl_u8 (&a, a, &a); +__builtin_ia32_aesenc128kl_u8 (&a, a, &a); +__builtin_ia32_aesenc256kl_u8 (&a, a, &a); +__builtin_ia32_encodekey128_u32 (0, a, &a); +__builtin_ia32_encodekey256_u32 (0, a, a, &a); +}
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:f3059f83d42d39f52845365a4a9564434b9743ec commit f3059f83d42d39f52845365a4a9564434b9743ec Author: Michael Meissner Date: Sun Nov 17 20:16:20 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 22 -- gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 - 2 files changed, 69 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 914026b6bb8d..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,28 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa") - (zero_extend:TI -(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z")))] - "TARGET_DIRECT_MOVE_64BIT" - "@ - # - # - mtvsrdd %x0,0,%1 - lxvrdx %x0,%y1" - "&& reload_completed && int_reg_operand (operands[0], TImode)" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 3) - (const_int 0))] -{ - operands[2] = gen_lowpart (DImode, operands[0]); - operands[3] = gen_highpart (DImode, operands[0]); -} - [(set_attr "type" "*,load,mtvsr,vecload") - (set_attr "length" "8,8,*,*") - (set_attr "isa" "*,*,*,p10")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index aa79dc880c8e.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target lp64 } */ -/* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -void -arg_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -mem_to_vsx (unsigned long long *p, __uint128_t *q) -{ - /* lxrdx vsx,0,ptr. */ - __uint128_t y = *p; - __asm__ (" # %x0" : "+wa" (y)); - *q = y; -} - - -void -arg_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -mem_to_gpr (unsigned long long *p, __uint128_t *q) -{ - /* ld gpr1_lo,addr; li gpr1_hi,0. */ - __uint128_t y = *p; - __asm__ (" # %0" : "+r" (y)); - *q = y; -} - -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:3666adaf055886784b847a89690d2b156ce70302 commit 3666adaf055886784b847a89690d2b156ce70302 Author: Michael Meissner Date: Sun Nov 17 18:41:44 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 47 +++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 58 + 2 files changed, 105 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..e3ac69430f39 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,53 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || (vsx_register_operand (operands[0], TImode) + && vsx_register_operand (operands[1], DImode)))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (int_reg_operand (op0, TImode)) +{ + operands[2] = gen_lowpart (DImode, op0); + operands[3] = op1; + operands[4] = gen_highpart (DImode, op0); + operands[5] = const0_rtx; +} + else +{ + int op0_r = reg_or_subregno (op0); + rtx op0_di = gen_rtx_REG (DImode, op0_r); + rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); + rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; + rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); +} +} + [(set_attr "type" "*,load,mtvsr,vecload,vecperm") + (set_attr "length" "8,8,*,*,8") + (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..863b3dbabc79 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +vsx_to_vsx (double d, __uint128_t *p) +{ + /* xxspltib + xxpermdir. */ + unsigned long long ull = d; + __uint128_t x = ull; + __asm__ (" # %x0" : "+wa" (x)); + *p = x; +} + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvrdx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc r15-5374] Alpha: Remove leftover `; ; ' for "unaligned_store"
https://gcc.gnu.org/g:4a8eb5c6d87f3a1ccdf6eb248e6a7dd4cffbb7d4 commit r15-5374-g4a8eb5c6d87f3a1ccdf6eb248e6a7dd4cffbb7d4 Author: Maciej W. Rozycki Date: Mon Nov 18 03:02:59 2024 + Alpha: Remove leftover `;;' for "unaligned_store" Remove stray `;;' from the middle of the introductory comment for the "unaligned_store" expander, clearly a leftover from a previous edition. gcc/ * config/alpha/alpha.md (unaligned_store): Remove stray `;;'. Diff: --- gcc/config/alpha/alpha.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index bd92392878e2..e57a9d31e013 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -4201,7 +4201,7 @@ }) ;; For the unaligned byte and halfword cases, we use code similar to that -;; in the ;; Architecture book, but reordered to lower the number of registers +;; in the Architecture book, but reordered to lower the number of registers ;; required. Operand 0 is the address. Operand 1 is the data to store. ;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may ;; be the same temporary, if desired. If the address is in a register,
[gcc r15-5363] libstdc++: Update link to Angelika Langer's book
https://gcc.gnu.org/g:6d52a568ddde63cea805ad08728a837ccd93bb8f commit r15-5363-g6d52a568ddde63cea805ad08728a837ccd93bb8f Author: Gerald Pfeifer Date: Sun Nov 17 12:05:36 2024 +0100 libstdc++: Update link to Angelika Langer's book libstdc++-v3: * doc/xml/manual/io.xml: Update link to Angelika Langer's book. * doc/html/manual/streambufs.html: Regenerate. Diff: --- libstdc++-v3/doc/html/manual/streambufs.html | 2 +- libstdc++-v3/doc/xml/manual/io.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/doc/html/manual/streambufs.html b/libstdc++-v3/doc/html/manual/streambufs.html index d3c592177f08..e299d70cb0f1 100644 --- a/libstdc++-v3/doc/html/manual/streambufs.html +++ b/libstdc++-v3/doc/html/manual/streambufs.html @@ -6,7 +6,7 @@ Creating your own stream buffers for I/O can be remarkably easy. If you are interested in doing so, we highly recommend two very excellent books: - http://www.angelikalanger.com/iostreams.html"; target="_top">Standard C++ + https://angelikalanger.com/iostreams.html"; target="_top">Standard C++ IOStreams and Locales by Langer and Kreft, ISBN 0-201-18395-1, and http://www.josuttis.com/libbook/"; target="_top">The C++ Standard Library by Nicolai Josuttis, ISBN 0-201-37926-0. Both are published by diff --git a/libstdc++-v3/doc/xml/manual/io.xml b/libstdc++-v3/doc/xml/manual/io.xml index 8380683561a6..d6fe2ce841d5 100644 --- a/libstdc++-v3/doc/xml/manual/io.xml +++ b/libstdc++-v3/doc/xml/manual/io.xml @@ -190,7 +190,7 @@ Creating your own stream buffers for I/O can be remarkably easy. If you are interested in doing so, we highly recommend two very excellent books: - http://www.w3.org/1999/xlink"; xlink:href="http://www.angelikalanger.com/iostreams.html";>Standard C++ + http://www.w3.org/1999/xlink"; xlink:href="https://angelikalanger.com/iostreams.html";>Standard C++ IOStreams and Locales by Langer and Kreft, ISBN 0-201-18395-1, and http://www.w3.org/1999/xlink"; xlink:href="http://www.josuttis.com/libbook/";>The C++ Standard Library by Nicolai Josuttis, ISBN 0-201-37926-0. Both are published by
[gcc r15-5366] libstdc++: add module std [PR106852]
https://gcc.gnu.org/g:7db55c0ba1baaf0e323ef7f9ef8c9cda077d40e9 commit r15-5366-g7db55c0ba1baaf0e323ef7f9ef8c9cda077d40e9 Author: Jason Merrill Date: Thu Oct 10 17:27:50 2024 -0400 libstdc++: add module std [PR106852] This patch introduces an installed source form of module std and std.compat. To help a build system find them, we install a libstdc++.modules.json file alongside libstdc++.so, which tells the build system where the files are and any special flags it should use when compiling them (none, in this case). The format is from a proposal in SG15. The build system can find this file with 'gcc -print-file-name=libstdc++.modules.json'. It seems preferable to use a relative path from this file to the sources so that moving the installation doesn't break the reference, but I didn't see any obvious way to compute that without relying on coreutils, perl, or python, so I wrote a POSIX shell script for it. The .. canonicalization bits aren't necessary since I discovered $(abspath), but I guess I might as well leave them in. Currently this installs the sources under $(gxx_include_dir)/bits/, i.e. /usr/include/c++/15/bits. So with my -fsearch-include-path change, std.cc can be compiled with g++ -fsearch-include-path bits/std.cc. Note that if someone actually tries to #include it will fail with "error: module control-line cannot be in included file". Any ideas about a more user-friendly way to express "compile module std" are welcome. The sources currently have the extension .cc, like other source files. std.cc started with m.cencora's implementation in PR114600. I've made some adjustments, but more is probably desirable, e.g. of the handling of namespace ranges, and to remove exports of templates that are only specialized in a particular header. I've filled in a bunch of missing exports, and added some FIXMEs where I noticed bits that are not implemented yet. Since bits/stdc++.h also intends to include the whole standard library, I include it rather than duplicate it. But stdc++.h comments out , due to TBB issues; I include it separately and suppress TBB usage, so module std won't currently provide parallel execution. It seemed most convenient for the two files to be monolithic so we don't need to worry about include paths. So the C library names that module std.compat exports in both namespace std and :: are a block of code that is appended to both files, adjusted based on whether the macro STD_COMPAT is defined before the block. In this implementation std.compat imports std; it would also be valid for it to duplicate everything in std. I see the libc++ std.compat also imports std. As discussed in the PR, module std is supported in C++20 mode even though it was added in C++23. Changes to test module std will follow in a separate patch. In my testing I've noticed a few compiler bugs that break various testcases, so I don't expect to enable module std testing by default at first. PR libstdc++/106852 libstdc++-v3/ChangeLog: * include/bits/version.def: Add __cpp_lib_modules. * include/bits/version.h: Regenerate. * src/c++23/Makefile.am: Add modules std and std.compat. * src/c++23/Makefile.in: Regenerate. * src/c++23/std-clib.cc.in: New file. * src/c++23/std.cc.in: New file. * src/c++23/std.compat.cc.in: New file. * src/c++23/libstdc++.modules.json.in: New file. contrib/ChangeLog: * relpath.sh: New file. Diff: --- libstdc++-v3/include/bits/version.h | 10 + libstdc++-v3/src/c++23/std-clib.cc.in| 676 + libstdc++-v3/src/c++23/std.cc.in | 3253 ++ libstdc++-v3/src/c++23/std.compat.cc.in | 29 + contrib/relpath.sh | 81 + libstdc++-v3/include/bits/version.def|9 + libstdc++-v3/src/c++23/Makefile.am | 34 + libstdc++-v3/src/c++23/Makefile.in | 144 +- libstdc++-v3/src/c++23/libstdc++.modules.json.in | 17 + 9 files changed, 4237 insertions(+), 16 deletions(-) diff --git a/libstdc++-v3/include/bits/version.h b/libstdc++-v3/include/bits/version.h index a0e3b36864b7..c556aca38fa8 100644 --- a/libstdc++-v3/include/bits/version.h +++ b/libstdc++-v3/include/bits/version.h @@ -2075,4 +2075,14 @@ #endif /* !defined(__cpp_lib_to_string) && defined(__glibcxx_want_to_string) */ #undef __glibcxx_want_to_string +#if !defined(__cpp_lib_modules) +# if (__cplusplus >= 202002L) && (__cpp_modules) +# define __glibcxx_modules 202207L +# if defined(__glibcxx_want_all) || defined(__glibcxx_want_modules) +# define __cpp_lib_modules 202207L +# endif +# endif +#endif /* !defined(
[gcc r15-5367] c-family: add -fsearch-include-path
https://gcc.gnu.org/g:dbfbd3aa2c1fb6293defcb1ad16099bb8aa4a0cb commit r15-5367-gdbfbd3aa2c1fb6293defcb1ad16099bb8aa4a0cb Author: Jason Merrill Date: Thu May 16 21:43:20 2024 -0400 c-family: add -fsearch-include-path The C++ modules code has a -fmodule-header (or -x c++-{user,system}-header) option to specify looking up headers to compile to header units on the usual include paths. I'd like to have the same functionality for full C++20 modules such as module std, which I proposed to live on the include path at bits/std.cc. But this behavior doesn't seem necessarily connected to modules, so I'm proposing a general C/C++ option to specify the behavior of looking in the include path for the input files specified on the command line. Other ideas for the name of the option are very welcome. The libcpp change is to allow -fsearch-include-path{,=user} to find files in the current working directory, like -include. This can be handy for a quick compile of both std.cc and a file that imports it, e.g. g++ -std=c++20 -fmodules -fsearch-include-path bits/std.cc importer.cc gcc/ChangeLog: * doc/cppopts.texi: Document -fsearch-include-path. * doc/invoke.texi: Mention it for modules. gcc/c-family/ChangeLog: * c.opt: Add -fsearch-include-path. * c-opts.cc (c_common_post_options): Handle it. gcc/cp/ChangeLog: * module.cc (module_preprocess_options): Don't override it. libcpp/ChangeLog: * internal.h (search_path_head): Declare. * files.cc (search_path_head): No longer static. * init.cc (cpp_read_main_file): Use it. Diff: --- gcc/doc/cppopts.texi | 15 +++ gcc/doc/invoke.texi| 5 + gcc/c-family/c.opt | 7 +++ libcpp/internal.h | 2 ++ gcc/c-family/c-opts.cc | 13 + gcc/cp/module.cc | 3 ++- libcpp/files.cc| 5 + libcpp/init.cc | 12 +--- 8 files changed, 50 insertions(+), 12 deletions(-) diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi index 5b5b0848ae86..748db5ea5797 100644 --- a/gcc/doc/cppopts.texi +++ b/gcc/doc/cppopts.texi @@ -270,6 +270,21 @@ When preprocessing, do not shorten system header paths with canonicalization. @item -fmax-include-depth=@var{depth} Set the maximum depth of the nested #include. The default is 200. +@opindex fsearch-include-path +@item -fsearch-include-path@r{[}=@var{kind}@r{]} +Look for input files on the #include path, not just the current +directory. This is particularly useful with C++20 modules, for which +both header units and module interface units need to be compiled +directly: + +@smallexample +g++ -c -std=c++20 -fmodules -fsearch-include-path bits/stdc++.h bits/std.cc +@end smallexample + +@var{kind} defaults to @samp{user}, which looks on the @code{#include +"@dots{}"} search path; you can also explicitly specify @samp{system} +for the @code{#include <@dots{}>} search path. + @opindex ftabstop @item -ftabstop=@var{width} Set the distance between tab stops. This helps the preprocessor report diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 929feaf23fbb..8aeccb1953a7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -38001,6 +38001,11 @@ installed. Specifying the language as one of these variants also inhibits output of the object file, as header files have no associated object file. +Alternately, or for a module interface unit in an installed location, +you can use @option{-fsearch-include-path} to specify that the main +source file should be found on the include path rather than the +current directory. + Header units can be used in much the same way as precompiled headers (@pxref{Precompiled Headers}), but with fewer restrictions: an #include that is translated to a header unit import can appear at any diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 61cfe33c2512..8224c82bfdf8 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -2280,6 +2280,13 @@ frtti C++ ObjC++ Optimization Var(flag_rtti) Init(1) Generate run time type descriptor information. +fsearch-include-path +C ObjC C++ ObjC++ +Look for the main source file on the include path. + +fsearch-include-path= +C++ ObjC++ Joined RejectNegative Undocumented + fshort-enums C ObjC C++ ObjC++ LTO Optimization Var(flag_short_enums) Use the narrowest integer type possible for enumeration types. diff --git a/libcpp/internal.h b/libcpp/internal.h index e65198e89daa..d91acd64ba39 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -766,6 +766,8 @@ extern _cpp_file *_cpp_find_file (cpp_reader *, const char *, cpp_dir *, int angle, _cpp_find_file_kind, location_t); extern bool _cpp_find_failed (_cpp_file *); extern void _cpp_mark_file_once_only (cpp_reader *, struct _cpp_file *); +extern cpp_dir *search_path_he
[gcc r15-5368] c++: -M and modules again
https://gcc.gnu.org/g:3e89a4d5138e585713604135d4c2601244055e82 commit r15-5368-g3e89a4d5138e585713604135d4c2601244055e82 Author: Jason Merrill Date: Sat Nov 16 15:25:04 2024 -0500 c++: -M and modules again While experimenting with testing module std I noticed that gcc -M broke on it; it seems I need to set directives_only even sooner than I did in r15-4219. gcc/c-family/ChangeLog: * c-ppoutput.cc (preprocess_file): Don't set directives_only here. gcc/cp/ChangeLog: * module.cc (module_preprocess_options): Set directives_only here. Diff: --- gcc/c-family/c-ppoutput.cc | 1 - gcc/cp/module.cc | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/c-family/c-ppoutput.cc b/gcc/c-family/c-ppoutput.cc index f6f83bdff00a..97ea864df141 100644 --- a/gcc/c-family/c-ppoutput.cc +++ b/gcc/c-family/c-ppoutput.cc @@ -95,7 +95,6 @@ preprocess_file (cpp_reader *pfile) if (flag_modules) { /* For macros from imported headers we need directives_only_cb. */ - cpp_get_options (pfile)->directives_only = true; scan_translation_unit_directives_only (pfile); } else diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 27eb39b48fa3..4136fdceb9f6 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -21224,6 +21224,8 @@ module_preprocess_options (cpp_reader *reader) } auto *opt = cpp_get_options (reader); opt->module_directives = true; + if (flag_no_output) + opt->directives_only = true; if (opt->main_search == CMS_none) opt->main_search = cpp_main_search (flag_header_unit); }
[gcc(refs/users/meissner/heads/work186-sha)] Add potential p-future XVRLD and XVRLDI instructions.
https://gcc.gnu.org/g:7ca461bf9cb3deedf9fe058d15de8f54c409766b commit 7ca461bf9cb3deedf9fe058d15de8f54c409766b Author: Michael Meissner Date: Sat Nov 16 02:14:19 2024 -0500 Add potential p-future XVRLD and XVRLDI instructions. 2024-11-16 Michael Meissner gcc/ * config/rs6000/altivec.md (altivec_vrl): Add support for a possible XVRLD instruction in the future. (altivec_vrl_immediate): New insns. * config/rs6000/predicates.md (vector_shift_immediate): New predicate. * config/rs6000/rs6000.h (TARGET_XVRLW): New macro. * config/rs6000/rs6000.md (isa attribute): Add xvrlw. (enabled attribute): Add support for xvrlw. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_powerpc_future_ok): New target. (check_effective_target_powerpc_dense_math_ok): Likewise. * gcc.target/powerpc/vector-rotate-left.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 35 +++ gcc/config/rs6000/predicates.md | 26 ++ gcc/config/rs6000/rs6000.h| 3 +++ gcc/config/rs6000/rs6000.md | 6 +- gcc/testsuite/lib/target-supports.exp | 35 +++ 5 files changed, 100 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 00dad4b91f1c..d4ee50322ca1 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1983,12 +1983,39 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl and will match for -mcpu=future, while other cpus will +;; match the generic insn. +;; However for testing, allow other xvrl variants. In particular, XVRLD for +;; the sha3 tests for multibuf/singlebuf. (define_insn "altivec_vrl" - [(set (match_operand:VI2 0 "register_operand" "=v") -(rotate:VI2 (match_operand:VI2 1 "register_operand" "v") - (match_operand:VI2 2 "register_operand" "v")))] + [(set (match_operand:VI2 0 "register_operand" "=v,wa") +(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa") + (match_operand:VI2 2 "register_operand" "v,wa")))] "" - "vrl %0,%1,%2" + "@ + vrl %0,%1,%2 + xvrl %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "isa" "*,xvrlw")]) + +(define_insn "*altivec_vrl_immediate" + [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa") + (match_operand:VI2 2 "vector_shift_immediate" "j,wM,wE,wS")))] + "TARGET_XVRLW && " +{ + rtx op2 = operands[2]; + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op2, mode, &num_insns, &value)) +gcc_unreachable (); + + operands[3] = GEN_INT (value & 0xff); + return "xvrli %x0,%x1,%3"; +} [(set_attr "type" "vecsimple")]) (define_insn "altivec_vrlq" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 1d95e34557e5..fccfbd7e4904 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -728,6 +728,32 @@ return num_insns == 1; }) +;; Return 1 if the operand is a CONST_VECTOR whose elements are all the +;; same and the elements can be an immediate shift or rotate factor +(define_predicate "vector_shift_immediate" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) +return true; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) +return false; + + switch (mode) +{ +case V16QImode: return IN_RANGE (value, 0, 7); +case V8HImode: return IN_RANGE (value, 0, 15); +case V4SImode: return IN_RANGE (value, 0, 31); +case V2DImode: return IN_RANGE (value, 0, 63); +default:break; +} + + return false; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index f95318dd5536..576c7ae66bb4 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -578,6 +578,9 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Whether we have XVRLW support. */ +#define TARGET_XVRLW TARGET_FUTURE + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6a2268f9e044..62b45a6bd613 100644 --- a/gcc/c
[gcc r15-5359] Avoid expicit builtion list in tree-ssa-dce
https://gcc.gnu.org/g:cc33f880e553d1aa94d19a349ad755f34c33de9e commit r15-5359-gcc33f880e553d1aa94d19a349ad755f34c33de9e Author: Jan Hubicka Date: Sat Nov 16 23:45:57 2024 +0100 Avoid expicit builtion list in tree-ssa-dce while working on -fmalloc-dce I noticed that tree-ssa-dce.cc still has an outdated list of builtions that are known to not read memory that can be replaced by query to fnspec and modref. If I get things right, dce does some dead store removal, but only on those memory object that are non-aliased (automatic variabels with no address taken) and for all other memory addresses it resorts to mark_all_reaching_defs_necessary expecting DSE to do the rest. So we really want to only check if there are no memory reads at all rather then trying to understand them by parsing fnspec or modref summary. I did run testsuite ensuring that all builtins matched previously are still matched. There are few testcases where this check fails, due to type incompatibility. New code uses gimple_call_builtin while other just checked callee_decl. We test things like calling free() without parmeter which I don't think we want to care about, but there is also testase declaring void * calloc (long, long) where builtin declaration expects unsigned long. I am not sure if this case should not be allowed by gimple_call_builtin? Bootstrappe/regtested x86_64-linux. OK? gcc/ChangeLog: * ipa-modref.cc (ipa_modref_callee_reads_no_memory_p): New function. * ipa-modref.h (ipa_modref_callee_reads_no_memory_p): Declare * tree-ssa-dce.cc (propagate_necessity): Use it. Diff: --- gcc/ipa-modref.cc| 32 gcc/ipa-modref.h | 1 + gcc/testsuite/g++.dg/tree-ssa/pr109442.C | 12 gcc/tree-ssa-dce.cc | 28 +--- 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index c1973aa36c4c..12ac0e7865a7 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -5600,4 +5600,36 @@ ipa_modref_cc_finalize () escape_summaries = NULL; } +/* Return true if call is known to perform no memory reads. */ + +bool +ipa_modref_callee_reads_no_memory_p (gcall *call) +{ + if (gimple_call_flags (call) & ECF_CONST) +return true; + attr_fnspec fnspec = gimple_call_fnspec (call); + if (fnspec.known_p () + && !fnspec.global_memory_read_p ()) +{ + bool found = false; + for (unsigned int i = 0; i < gimple_call_num_args (call) && !found; i++) + if (!POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (call, i + ; + else if (!fnspec.arg_specified_p (i) + || fnspec.arg_maybe_read_p (i)) + found = true; + if (!found) + return true; +} + + /* For interposed calls we can not be sure that the other, semantically + equivalent body, will not perform some redundant load from memory + that may become undefined if we optimize out some stores. */ + bool interposed; + modref_summary *sum = get_modref_function_summary (call, &interposed); + if (sum && !interposed && !sum->global_memory_read && !sum->loads) +return true; + return false; +} + #include "gt-ipa-modref.h" diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index 1bbe9bffee05..a0eb63a0afab 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -75,6 +75,7 @@ modref_summary *get_modref_function_summary (cgraph_node *func); modref_summary *get_modref_function_summary (gcall *call, bool *interposed); void ipa_modref_cc_finalize (); void ipa_merge_modref_summary_after_inlining (cgraph_edge *e); +bool ipa_modref_callee_reads_no_memory_p (gcall *call); /* All flags that are implied by the ECF_CONST functions. */ static const int implicit_const_eaf_flags diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr109442.C b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C new file mode 100644 index ..ec40c470c8dd --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr109442.C @@ -0,0 +1,12 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-O1 -fdump-tree-optimized" } +#include +#define T int +T vat1(std::vector v1) { +auto v = v1; +return 10; +} +// This should compile to empty function; check that no size of +// vector is determined and there is no allocation +// { dg-final { scan-tree-dump-not "_M_start" "optimized" } } +// { dg-final { scan-tree-dump-not "delete" "optimized" } } diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index 643a1efd8707..70e3843cabfd 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -69,6 +69,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-propagate.h" #include "gimple-fold.h" #include "tree-ssa.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" static struct stmt_stats { @@
[gcc(refs/users/meissner/heads/work186-sha)] Add potential p-future XVRLD and XVRLDI instructions.
https://gcc.gnu.org/g:9a76736714f0cdb5bc7d9b93df4684df9857a1cc commit 9a76736714f0cdb5bc7d9b93df4684df9857a1cc Author: Michael Meissner Date: Sat Nov 16 02:18:12 2024 -0500 Add potential p-future XVRLD and XVRLDI instructions. 2024-11-16 Michael Meissner gcc/ * config/rs6000/altivec.md (altivec_vrl): Add support for a possible XVRLD instruction in the future. (altivec_vrl_immediate): New insns. * config/rs6000/predicates.md (vector_shift_immediate): New predicate. * config/rs6000/rs6000.h (TARGET_XVRLW): New macro. * config/rs6000/rs6000.md (isa attribute): Add xvrlw. (enabled attribute): Add support for xvrlw. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_powerpc_future_ok): New target. (check_effective_target_powerpc_dense_math_ok): Likewise. * gcc.target/powerpc/vector-rotate-left.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 35 +++--- gcc/config/rs6000/predicates.md| 26 gcc/config/rs6000/rs6000.h | 3 ++ gcc/config/rs6000/rs6000.md| 6 +++- .../gcc.target/powerpc/vector-rotate-left.c| 34 + gcc/testsuite/lib/target-supports.exp | 35 ++ 6 files changed, 134 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 00dad4b91f1c..d4ee50322ca1 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1983,12 +1983,39 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl and will match for -mcpu=future, while other cpus will +;; match the generic insn. +;; However for testing, allow other xvrl variants. In particular, XVRLD for +;; the sha3 tests for multibuf/singlebuf. (define_insn "altivec_vrl" - [(set (match_operand:VI2 0 "register_operand" "=v") -(rotate:VI2 (match_operand:VI2 1 "register_operand" "v") - (match_operand:VI2 2 "register_operand" "v")))] + [(set (match_operand:VI2 0 "register_operand" "=v,wa") +(rotate:VI2 (match_operand:VI2 1 "register_operand" "v,wa") + (match_operand:VI2 2 "register_operand" "v,wa")))] "" - "vrl %0,%1,%2" + "@ + vrl %0,%1,%2 + xvrl %x0,%x1,%x2" + [(set_attr "type" "vecsimple") + (set_attr "isa" "*,xvrlw")]) + +(define_insn "*altivec_vrl_immediate" + [(set (match_operand:VI2 0 "register_operand" "=wa,wa,wa,wa") + (rotate:VI2 (match_operand:VI2 1 "register_operand" "wa,wa,wa,wa") + (match_operand:VI2 2 "vector_shift_immediate" "j,wM,wE,wS")))] + "TARGET_XVRLW && " +{ + rtx op2 = operands[2]; + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op2, mode, &num_insns, &value)) +gcc_unreachable (); + + operands[3] = GEN_INT (value & 0xff); + return "xvrli %x0,%x1,%3"; +} [(set_attr "type" "vecsimple")]) (define_insn "altivec_vrlq" diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 1d95e34557e5..fccfbd7e4904 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -728,6 +728,32 @@ return num_insns == 1; }) +;; Return 1 if the operand is a CONST_VECTOR whose elements are all the +;; same and the elements can be an immediate shift or rotate factor +(define_predicate "vector_shift_immediate" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) +return true; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) +return false; + + switch (mode) +{ +case V16QImode: return IN_RANGE (value, 0, 7); +case V8HImode: return IN_RANGE (value, 0, 15); +case V4SImode: return IN_RANGE (value, 0, 31); +case V2DImode: return IN_RANGE (value, 0, 63); +default:break; +} + + return false; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index f95318dd5536..576c7ae66bb4 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -578,6 +578,9 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Whether we have XVRLW support. */ +#define TARGET_XVRLW TARGET_FUTURE + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/
[gcc(refs/users/meissner/heads/work186-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:8480aca690649fef083daad6edeec9801846d485 commit 8480aca690649fef083daad6edeec9801846d485 Author: Michael Meissner Date: Sun Nov 17 17:16:30 2024 -0500 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 30 ++ 1 file changed, 30 insertions(+) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index ab49d7c52ae6..8060ee644ac2 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,3 +1,33 @@ + Branch work186-bugs, patch #502 + +PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode + +Previously GCC would zero externd a DImode GPR value to TImode by first zero +extending the DImode value into a GPR TImode value, and then do a MTVSRDD to +move this value to a VSX register. + +This patch does the move directly, since if the middle argument to MTVSRDD is 0, +it does the zero extend. + +This patch also generates LXVRDX if the DImode value is in memory. + +Finally, it the DImode is already in a vector register, it does a XXSPLTIB and +XXPERMDI to get the value into the bottom 64-bits of the register. + +I have built GCC with the patches in this patch set applied on both little and +big endian PowerPC systems and there were no regressions. Can I apply this +patch to GCC 15? + +2024-11-17 Michael Meissner + +gcc/ + + * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. + +gcc/testsuite/ + + * gcc.target/powerpc/pr108958.c: New test. + Branch work186-bugs, patch #501 Add power9 and power10 float to logical optimizations.
[gcc(refs/users/meissner/heads/work186-bugs)] Revert changes
https://gcc.gnu.org/g:d57549e16e82e31dd1620ca3d532092f49376cfa commit d57549e16e82e31dd1620ca3d532092f49376cfa Author: Michael Meissner Date: Sun Nov 17 18:11:53 2024 -0500 Revert changes Diff: --- gcc/config/rs6000/rs6000.md | 47 gcc/testsuite/gcc.target/powerpc/pr108958.c | 55 - 2 files changed, 102 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e3ac69430f39..d266f93ff2e4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,53 +1026,6 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") - (zero_extend:TI -(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] - "TARGET_DIRECT_MOVE_64BIT" - "@ - # - # - mtvsrdd %x0,0,%1 - lxvrdx %x0,%y1 - #" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || (vsx_register_operand (operands[0], TImode) - && vsx_register_operand (operands[1], DImode)))" - [(set (match_dup 2) - (match_dup 3)) - (set (match_dup 4) - (match_dup 5))] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - if (int_reg_operand (op0, TImode)) -{ - operands[2] = gen_lowpart (DImode, op0); - operands[3] = op1; - operands[4] = gen_highpart (DImode, op0); - operands[5] = const0_rtx; -} - else -{ - int op0_r = reg_or_subregno (op0); - rtx op0_di = gen_rtx_REG (DImode, op0_r); - rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); - rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; - rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; - - operands[2] = op0_v2di; - operands[3] = CONST0_RTX (V2DImode); - operands[4] = op0_v2di; - operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); -} -} - [(set_attr "type" "*,load,mtvsr,vecload,vecperm") - (set_attr "length" "8,8,*,*,8") - (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c deleted file mode 100644 index 948ba327591b.. --- a/gcc/testsuite/gcc.target/powerpc/pr108958.c +++ /dev/null @@ -1,55 +0,0 @@ -/* { dg-do compile { int128 && lp64 && power10_ok } } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ - -void -arg_to_vsx (unsigned long long x, __uint128_t *p) -{ - /* mtvsrdd vsx,0,gpr. */ - __uint128_t y = x; - __asm__ (" # %x0" : "+wa" (y)); - *p = y; -} - -void -mem_to_vsx (unsigned long long *p, __uint128_t *q) -{ - /* lxrdx vsx,0,ptr. */ - __uint128_t y = *p; - __asm__ (" # %x0" : "+wa" (y)); - *q = y; -} - - -void -vsx_to_vsx (double d, __uint128_t *p) -{ - /* xxspltib + xxpermdir. */ - unsigned long long ull = d; - __uint128_t x = ull; - __asm__ (" # %x0" : "+wa" (x)); - *p = x; -} - -void -arg_to_gpr (unsigned long long x, __uint128_t *p) -{ - /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ - __uint128_t y = x; - __asm__ (" # %0" : "+r" (y)); - *p = y; -} - -void -mem_to_gpr (unsigned long long *p, __uint128_t *q) -{ - /* ld gpr1_lo,addr; li gpr1_hi,0. */ - __uint128_t y = *p; - __asm__ (" # %0" : "+r" (y)); - *q = y; -} - -/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxrdx\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
https://gcc.gnu.org/g:b5906edb9d2e7dcc3c1c70030133ef0a3f5fb5b3 commit b5906edb9d2e7dcc3c1c70030133ef0a3f5fb5b3 Author: Michael Meissner Date: Sun Nov 17 18:13:28 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner gcc/ * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 47 +++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 58 + 2 files changed, 105 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..e3ac69430f39 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,53 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa") + (zero_extend:TI +(match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))] + "TARGET_DIRECT_MOVE_64BIT" + "@ + # + # + mtvsrdd %x0,0,%1 + lxvrdx %x0,%y1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || (vsx_register_operand (operands[0], TImode) + && vsx_register_operand (operands[1], DImode)))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + if (int_reg_operand (op0, TImode)) +{ + operands[2] = gen_lowpart (DImode, op0); + operands[3] = op1; + operands[4] = gen_highpart (DImode, op0); + operands[5] = const0_rtx; +} + else +{ + int op0_r = reg_or_subregno (op0); + rtx op0_di = gen_rtx_REG (DImode, op0_r); + rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r); + rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1; + rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); +} +} + [(set_attr "type" "*,load,mtvsr,vecload,vecperm") + (set_attr "length" "8,8,*,*,8") + (set_attr "isa" "*,*,*,p10,*")]) (define_insn "extendqi2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index ..80155cff0b9e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +void +arg_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +mem_to_vsx (unsigned long long *p, __uint128_t *q) +{ + /* lxrdx vsx,0,ptr. */ + __uint128_t y = *p; + __asm__ (" # %x0" : "+wa" (y)); + *q = y; +} + + +void +vsx_to_vsx (double d, __uint128_t *p) +{ + /* xxspltib + xxpermdir. */ + unsigned long long ull = d; + __uint128_t x = ull; + __asm__ (" # %x0" : "+wa" (x)); + *p = x; +} + +void +arg_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr gpr1_lo,gpr2; li gpr1_hi,0. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +mem_to_gpr (unsigned long long *p, __uint128_t *q) +{ + /* ld gpr1_lo,addr; li gpr1_hi,0. */ + __uint128_t y = *p; + __asm__ (" # %0" : "+r" (y)); + *q = y; +} + +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxrdx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M}1 } } */
[gcc(refs/users/meissner/heads/work186-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:81f1e1e30680bbc1996963569ee24e248f5b287d commit 81f1e1e30680bbc1996963569ee24e248f5b287d Author: Michael Meissner Date: Sat Nov 16 02:32:52 2024 -0500 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 64 ++ 1 file changed, 64 insertions(+) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 3577ed4dfece..5f4a624cb75a 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,5 +1,69 @@ + Branch work186-bugs, patch #501 + +Add power9 and power10 float to logical optimizations. + +2024-11-16 Michael Meissner + +gcc/ + + PR target/117487 + * config/rs6000/vsx.md (SFmode logical peephoole): Update comments in + the original code that supports power8. Add a new define_peephole2 to + do the optimization on power9/power10. + + Branch work186-bugs, patch #500 + +PR 99293: Optimize splat of a V2DF/V2DI extract with constant element + +We had optimizations for splat of a vector extract for the other vector +types, but we missed having one for V2DI and V2DF. This patch adds a +combiner insn to do this optimization. + +In looking at the source, we had similar optimizations for V4SI and V4SF +extract and splats, but we missed doing V2DI/V2DF. + +Without the patch for the code: + + vector long long splat_dup_l_0 (vector long long v) + { + return __builtin_vec_splats (__builtin_vec_extract (v, 0)); + } + +the compiler generates (on a little endian power9): + + splat_dup_l_0: + mfvsrld 9,34 + mtvsrdd 34,9,9 + blr + +Now it generates: + + splat_dup_l_0: + xxpermdi 34,34,34,3 + blr + +2024-11-16 Michael Meissner + +gcc/ + + * config/rs6000/vsx.md (vsx_splat_extract_): New insn. + +gcc/testsuite/ + + * gcc.target/powerpc/builtins-1.c: Adjust insn count. + * gcc.target/powerpc/pr99293.c: New test. + Branch work186-bugs, baseline +Add ChangeLog.bugs and update REVISION. + +2024-11-14 Michael Meissner + +gcc/ + + * ChangeLog.bugs: New file for branch. + * REVISION: Update. + 2024-11-14 Michael Meissner Clone branch
[gcc r15-5364] libstdc++: Move a gcc.gnu.org link to https
https://gcc.gnu.org/g:696bd86260e268d5603fb227e276834ebd0e01b4 commit r15-5364-g696bd86260e268d5603fb227e276834ebd0e01b4 Author: Gerald Pfeifer Date: Sun Nov 17 12:36:29 2024 +0100 libstdc++: Move a gcc.gnu.org link to https libstdc++-v3: * doc/xml/manual/intro.xml: Move a gcc.gnu.org link to https. * doc/html/manual/license.html: Regenerate. Diff: --- libstdc++-v3/doc/html/manual/license.html | 2 +- libstdc++-v3/doc/xml/manual/intro.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/doc/html/manual/license.html b/libstdc++-v3/doc/html/manual/license.html index e10808a8908e..f6a9e6ce6dd6 100644 --- a/libstdc++-v3/doc/html/manual/license.html +++ b/libstdc++-v3/doc/html/manual/license.html @@ -4,7 +4,7 @@ and one for the documentation. There is a license section in the FAQ regarding common questions. If you have more - questions, ask the FSF or the http://gcc.gnu.org/lists.html"; target="_top">gcc mailing list. + questions, ask the FSF or the https://gcc.gnu.org/lists.html"; target="_top">gcc mailing list. The Code: GPL The source code is distributed under the GNU General Public License version 3, with the addition under section 7 of an exception described in diff --git a/libstdc++-v3/doc/xml/manual/intro.xml b/libstdc++-v3/doc/xml/manual/intro.xml index b940e9cfa909..ed0b90d202bd 100644 --- a/libstdc++-v3/doc/xml/manual/intro.xml +++ b/libstdc++-v3/doc/xml/manual/intro.xml @@ -75,7 +75,7 @@ There is a license section in the FAQ regarding common questions. If you have more - questions, ask the FSF or the http://www.w3.org/1999/xlink"; xlink:href="http://gcc.gnu.org/lists.html";>gcc mailing list. + questions, ask the FSF or the http://www.w3.org/1999/xlink"; xlink:href="https://gcc.gnu.org/lists.html";>gcc mailing list. The Code: GPL
[gcc r15-5365] Mark asm statements as necessary in ipa-fnsummary
https://gcc.gnu.org/g:bd59f2eeacd41b91e4e79b32dda83cc60d499e25 commit r15-5365-gbd59f2eeacd41b91e4e79b32dda83cc60d499e25 Author: Jan Hubicka Date: Sun Nov 17 15:48:29 2024 +0100 Mark asm statements as necessary in ipa-fnsummary I forgot to mark asm statements as necessary in ipa-fnsummary. This should mask failure of gcc.dg/guality/pr36728-2.c where the patch enabled cloning which breaks debug info. gcc/ChangeLog: * ipa-fnsummary.cc (find_necessary_statements): ASM statements are necessary. Diff: --- gcc/ipa-fnsummary.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index 87e08dad8467..e6bdc0069694 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -2804,7 +2804,8 @@ find_necessary_statements (struct cgraph_node *node) || (is_ctrl_stmt (stmt) && (gimple_code (stmt) != GIMPLE_COND || !guards_builtin_unreachable (bb, cache))) - || gimple_store_p (stmt)) + || gimple_store_p (stmt) + || gimple_code (stmt) == GIMPLE_ASM) mark_stmt_necessary (stmt, worklist); } }
[gcc r15-5371] c++: regenerate opt urls
https://gcc.gnu.org/g:db348caef98b120262db91cab5f10713ce1570f3 commit r15-5371-gdb348caef98b120262db91cab5f10713ce1570f3 Author: Jason Merrill Date: Sun Nov 17 20:42:09 2024 +0100 c++: regenerate opt urls This should have been part of r15-5367. One day I'll remember to do this before buildbot sends me hate mail. gcc/c-family/ChangeLog: * c.opt.urls: Regenerate. Diff: --- gcc/c-family/c.opt.urls | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index 91918c492047..060cde01dbca 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -1304,6 +1304,12 @@ UrlSuffix(gcc/Objective-C-and-Objective-C_002b_002b-Dialect-Options.html#index-f frtti UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fno-rtti) LangUrlSuffix_D(gdc/Runtime-Options.html#index-fno-rtti) +fsearch-include-path +UrlSuffix(gcc/Preprocessor-Options.html#index-fsearch-include-path) + +fsearch-include-path= +UrlSuffix(gcc/Preprocessor-Options.html#index-fsearch-include-path) + ; skipping UrlSuffix for 'fshort-enums' due to multiple URLs: ; duplicate: 'gcc/Code-Gen-Options.html#index-fshort-enums' ; duplicate: 'gcc/Non-bugs.html#index-fshort-enums-3'