https://gcc.gnu.org/g:0a1ae1fb1364e066499c888790c63b06872b622e
commit 0a1ae1fb1364e066499c888790c63b06872b622e Author: Michael Meissner <meiss...@linux.ibm.com> Date: Sun Nov 17 23:27:17 2024 -0500 PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode Previously GCC would zero externd a DImode GPR value to TImode by first zero extending the DImode value into a GPR TImode value, and then do a MTVSRDD to move this value to a VSX register. This patch does the move directly, since if the middle argument to MTVSRDD is 0, it does the zero extend. This patch also generates LXVRDX if the DImode value is in memory. Finally, it the DImode is already in a vector register, it does a XXSPLTIB and XXPERMDI to get the value into the bottom 64-bits of the register. I have built GCC with the patches in this patch set applied on both little and big endian PowerPC systems and there were no regressions. Can I apply this patch to GCC 15? 2024-11-17 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/108598 * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn. gcc/testsuite/ PR target/108598 * gcc.target/powerpc/pr108958.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 46 ++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr108958.c | 73 +++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d266f93ff2e4..bfb02b07ef4e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1026,6 +1026,52 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) +(define_insn_and_split "zero_extendditi2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=r,wa,&wa") + (zero_extend:TI + (match_operand:DI 1 "gpc_reg_operand" "rwa,r,wa")))] + "TARGET_P9_VECTOR && TARGET_POWERPC64" + "@ + # + mtvsrdd %x0,0,%1 + #" + "&& reload_completed + && (int_reg_operand (operands[0], TImode) + || vsx_register_operand (operands[1], DImode))" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 4) + (match_dup 5))] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int r = reg_or_subregno (op0); + + if (int_reg_operand (op0, TImode)) + { + int lo = BYTES_BIG_ENDIAN ? 1 : 0; + int hi = 1 - lo; + + operands[2] = gen_rtx_REG (DImode, r + lo); + operands[3] = op1; + operands[4] = gen_rtx_REG (DImode, r + hi); + operands[5] = const0_rtx; + } + else + { + rtx op0_di = gen_rtx_REG (DImode, r); + rtx op0_v2di = gen_rtx_REG (V2DImode, r); + rtx lo = WORDS_BIG_ENDIAN ? op1 : op0_di; + rtx hi = WORDS_BIG_ENDIAN ? op0_di : op1; + + operands[2] = op0_v2di; + operands[3] = CONST0_RTX (V2DImode); + operands[4] = op0_v2di; + operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo); + } +} + [(set_attr "type" "*,mtvsr,vecperm") + (set_attr "length" "8,*,8")]) (define_insn "extendqi<mode>2" [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v") diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c b/gcc/testsuite/gcc.target/powerpc/pr108958.c new file mode 100644 index 000000000000..52a969507cb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c @@ -0,0 +1,73 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ + +/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register. */ + +union u { + double d; + unsigned long long u64; +}; + +void +gpr_to_vsx (unsigned long long x, __uint128_t *p) +{ + /* mtvsrdd vsx,0,gpr. */ + __uint128_t y = x; + __asm__ (" # %x0" : "+wa" (y)); + *p = y; +} + +void +vsx_to_vsx (double d, __uint128_t *p) +{ + unsigned long long x; + __uint128_t y; + union u u2; + + u2.d = d; + x = u2.u64; + + __asm__ (" # %x0" : "+wa" (x)); + + /* xxspltib and xxpermdi. */ + y = x; + __asm__ (" # %x0" : "+wa" (y)); + + *p = y; +} + +void +gpr_to_gpr (unsigned long long x, __uint128_t *p) +{ + /* mr and li. */ + __uint128_t y = x; + __asm__ (" # %0" : "+r" (y)); + *p = y; +} + +void +vsx_to_gpr (double d, __uint128_t *p) +{ + unsigned long long x; + __uint128_t y; + union u u2; + + u2.d = d; + x = u2.u64; + + __asm__ (" # %x0" : "+wa" (x)); + + /* mfvsrd and li. */ + y = x; + __asm__ (" # %0" : "+r" (y)); + + *p = y; +} + +/* { dg-final { scan-assembler-times {\mli\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */