https://gcc.gnu.org/g:00385c803b0221ed36b44b286e79fd6a29c53be5

commit 00385c803b0221ed36b44b286e79fd6a29c53be5
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Tue Jun 3 19:09:01 2025 -0400

    PR target/108958 -- simplify mtvsrdd to zero extend GPR DImode to VSX TImode
    
    Previously GCC would zero extend a DImode GPR value to TImode by first zero
    extending the DImode value into a GPR TImode register pair, and then do a
    MTVSRDD to move this value to a VSX register.
    
    This patch creates a peephole2 to catch this case, and it eliminates 
creating
    the TImode variable.  Instead it just does the MTVSRDD instruction directly.
    
    I have built GCC with the patches in this patch set applied on both little 
and
    big endian PowerPC systems and there were no regressions.  Can I apply this
    patch to GCC 16?
    
    2025-06-03  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/108958
            * config/rs6000/rs6000.md (UNSPEC_ZERO_EXTEND): New unspec.
            (zero_extendsiti2 peephole2): Add a peephole2 to simplify zero 
extend
            between DImode value in a GPR to a TImode target in a vector 
register.
            (zero_extendsiti2_vsx): New insn.
    
    gcc/testsuite/
    
            PR target/108958
            * gcc.target/powerpc/pr108958.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000.md                 | 26 ++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 47 +++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c65d564f5142..0674ab922095 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -173,6 +173,7 @@
    UNSPEC_XXSPLTIW_CONST
    UNSPEC_FMAX
    UNSPEC_FMIN
+   UNSPEC_ZERO_EXTEND
   ])
 
 ;;
@@ -969,6 +970,31 @@
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+;; Optimize zero_extendsiti2 from a GPR to a GPR and then moving the GPR to a
+;; VSX register
+(define_peephole2
+  [(set (match_operand:DI 0 "int_reg_operand")
+       (match_operand:DI 1 "int_reg_operand"))
+   (set (match_operand:DI 2 "int_reg_operand")
+       (const_int 0))
+   (set (match_operand:TI 3 "vsx_register_operand")
+       (match_operand:TI 4 "int_reg_operand"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && (reg_or_subregno (operands[0])
+       == reg_or_subregno (operands[4]) + !!WORDS_BIG_ENDIAN)
+   && (reg_or_subregno (operands[2])
+       == reg_or_subregno (operands[4]) + !WORDS_BIG_ENDIAN)
+   && peep2_reg_dead_p (3, operands[4])"
+  [(set (match_dup 3)
+       (unspec:TI [(match_dup 1)] UNSPEC_ZERO_EXTEND))])
+
+(define_insn "*zero_extendsiti2_vsx"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+       (unspec:TI [(match_operand:DI 1 "int_reg_operand" "r")]
+                  UNSPEC_ZERO_EXTEND))]
+  "TARGET_DIRECT_MOVE_64BIT"
+  "mtvsrdd %x0,0,%1"
+  [(set_attr "type" "mtvsr")])
 
 (define_insn "zero_extendsi<mode>2"
   [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,d,wa,wa,r,wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c 
b/gcc/testsuite/gcc.target/powerpc/pr108958.c
new file mode 100644
index 000000000000..21b3f2766918
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+#ifndef TYPE
+#define TYPE unsigned long long
+#endif
+
+/* PR target/108958, when zero extending a DImode to a TImode, and the TImode 
variable is in a VSX register, generate:
+
+       mtvsrdd vreg,0,gpr
+
+   instead of:
+
+       mr tmp,gpr
+       li tmp+1,0
+       mtvsrdd vreg,tmp+1,tmp.  */
+
+void
+gpr_to_vsx (TYPE x, __uint128_t *p)
+{
+  /* mtvsrdd 0,0,3
+     stvx 0,0(4)  */
+
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+gpr_to_gpr (TYPE x, __uint128_t *p)
+{
+  /* mr 2,3
+     li 3,0
+     std 2,0(4)
+     std 3,8(4)  */
+
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+/* { dg-final { scan-assembler-times {\mli\M}              1 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstd\M}             2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M}            1 } } */

Reply via email to