https://gcc.gnu.org/g:00385c803b0221ed36b44b286e79fd6a29c53be5
commit 00385c803b0221ed36b44b286e79fd6a29c53be5 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Tue Jun 3 19:09:01 2025 -0400 PR target/108958 -- simplify mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero extend a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode register pair, and then do a MTVSRDD to move this value to a VSX register. This patch creates a peephole2 to catch this case, and it eliminates creating the TImode variable. Instead it just does the MTVSRDD instruction directly. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 16? 2025-06-03 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/108958 * config/rs6000/rs6000.md (UNSPEC_ZERO_EXTEND): New unspec. (zero_extendsiti2 peephole2): Add a peephole2 to simplify zero extend between DImode value in a GPR to a TImode target in a vector register. (zero_extendsiti2_vsx): New insn. gcc/testsuite/ PR target/108958 * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 26 ++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 +++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index c65d564f5142..0674ab922095 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -173,6 +173,7 @@ UNSPEC_XXSPLTIW_CONST UNSPEC_FMAX UNSPEC_FMIN + UNSPEC_ZERO_EXTEND ]) ;; @@ -969,6 +970,31 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +;; Optimize zero_extendsiti2 from a GPR to a GPR and then moving the GPR to a +;; VSX register +(define_peephole2 + [(set (match_operand:DI 0 "int_reg_operand") + (match_operand:DI 1 "int_reg_operand")) + (set (match_operand:DI 2 "int_reg_operand") + (const_int 0)) + (set (match_operand:TI 3 "vsx_register_operand") + (match_operand:TI 4 "int_reg_operand"))] + "TARGET_DIRECT_MOVE_64BIT + && (reg_or_subregno (operands[0]) + == reg_or_subregno (operands[4]) + !!WORDS_BIG_ENDIAN) + && (reg_or_subregno (operands[2]) + == reg_or_subregno (operands[4]) + !WORDS_BIG_ENDIAN) + && peep2_reg_dead_p (3, operands[4])" + [(set (match_dup 3) + (unspec:TI [(match_dup 1)] UNSPEC_ZERO_EXTEND))]) + +(define_insn "*zero_extendsiti2_vsx" + [(set (match_operand:TI 0 "vsx_register_operand" "=wa") + (unspec:TI [(match_operand:DI 1 "int_reg_operand" "r")] + UNSPEC_ZERO_EXTEND))] + "TARGET_DIRECT_MOVE_64BIT" + "mtvsrdd %x0,0,%1" + [(set_attr "type" "mtvsr")]) (define_insn "zero_extendsi<mode>2" [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,d,wa,wa,r,wa") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index 000000000000..21b3f2766918 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ + +#ifndef TYPE +#define TYPE unsigned long long +#endif + +/* PR target/108958, when zero extending a DImode to a TImode, and the TImode variable is in a VSX register, generate: + + mtvsrdd vreg,0,gpr + + instead of: + + mr tmp,gpr + li tmp+1,0 + mtvsrdd vreg,tmp+1,tmp. */ + +void +gpr_to_vsx (TYPE x, __uint128_t *p) +{ + /* mtvsrdd 0,0,3 + stvx 0,0(4) */ + + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +gpr_to_gpr (TYPE x, __uint128_t *p) +{ + /* mr 2,3 + li 3,0 + std 2,0(4) + std 3,8(4) */ + + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +/* { dg-final { scan-assembler-times {\mli\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */