https://gcc.gnu.org/g:a1c592be501b12942181391cb6a7e3cca54e4e45

commit r16-1717-ga1c592be501b12942181391cb6a7e3cca54e4e45
Author: Dimitar Dimitrov <dimi...@dinux.eu>
Date:   Sun Feb 9 17:55:03 2025 +0200

    pru: Split 64-bit moves into a sequence of 32-bit moves
    
    The 64-bit register-to-register moves on PRU are implemented with two
    instructions moving 32-bit registers.  Defining a split for the 64-bit
    moves allows this to be described in RTL, and thus one of the 32-bit
    moves to be eliminated if the destination register is dead.
    
    Also, split the loading of non-trivial 64-bit integer constants.  The
    resulting 32-bit integer constants have better chance to be loaded with
    something more optimal than an "ldi32".
    
    For now do the splits only after register allocation, because LRA does
    not yet efficiently handle subregs.  See
    https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
    
    This patch shows slight improvement for wikisort benchmark from
    embench-iot:
    
    Benchmark          size-before  size-after  difference
    ---------          -----------  ----------  ----------
    aha-mont64          1,648       1,648       0
    crc32                 104       104         0
    depthconv           1,172       1,172       0
    edn                 3,040       3,040       0
    huffbench           1,616       1,616       0
    matmult-int           748       748         0
    md5sum                700       700         0
    nettle-aes          2,664       2,664       0
    nettle-sha256       5,732       5,732       0
    nsichneu           21,372       21,372      0
    picojpeg            9,716       9,716       0
    qrduino             8,556       8,556       0
    sglib-combined      3,724       3,724       0
    slre                3,488       3,488       0
    statemate           1,132       1,132       0
    tarfind               652       652         0
    ud                  1,004       1,004       0
    wikisort           18,120       18,092      -28
    xgboost               300       300         0
    
    gcc/ChangeLog:
    
            * config/pru/pru.md (reg move splitter): New splitter for 64-bit
            register moves into two 32-bit moves.
            (const_int move splitter): New splitter for 64-bit constant
            integer moves into two 32-bit moves.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/pru/mov64-subreg-1.c: New test.
            * gcc.target/pru/mov64-subreg-2.c: New test.
    
    Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu>

Diff:
---
 gcc/config/pru/pru.md                         | 77 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/pru/mov64-subreg-1.c |  9 ++++
 gcc/testsuite/gcc.target/pru/mov64-subreg-2.c |  8 +++
 3 files changed, 94 insertions(+)

diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index fcd310613f50..3504e42e9002 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -283,6 +283,83 @@
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,8,8,8,16")])
 
+; Break 64-bit register-to-register moves into 32-bit moves.
+; If only a subreg of the destination is used, this split would allow
+; for the other 32-bit subreg of the DI register to be eliminated.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "register_operand"))]
+  "
+   /* TODO - LRA does not yet handle subregs efficiently.
+      So it is profitable to split only after register allocation is
+      complete.
+      Once https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
+      is merged, this condition should be removed to allow splitting
+      before LRA.  */
+   reload_completed
+   /* Sign-extended paradoxical registers require expansion
+      of the proper pattern.  We can do only zero extension here.  */
+   && (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1])
+       ? SUBREG_PROMOTED_VAR_P (operands[1])
+         && SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0
+       : true)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1]))
+    {
+      gcc_assert (SUBREG_PROMOTED_VAR_P (operands[1]));
+      gcc_assert (SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0);
+
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = const0_rtx;
+    }
+  else if (!reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = src_hi;
+    }
+  else
+    {
+      operands[0] = dst_hi;
+      operands[1] = src_hi;
+      operands[2] = dst_lo;
+      operands[3] = src_lo;
+    }
+  "
+)
+
+; Break loading of non-trivial 64-bit constant integers.  The split
+; will not generate better code sequence, but at least would allow
+; dropping a non-live 32-bit part of the destination, or better
+; constant propagation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "const_int_operand"))]
+  "reload_completed
+   && !satisfies_constraint_Z (operands[1])
+   && !satisfies_constraint_Um (operands[1])
+   && !satisfies_constraint_T (operands[1])"
+
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);;
+  operands[0] = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  "
+)
+
 ;
 ; load_multiple pattern(s).
 ;
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c 
b/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
new file mode 100644
index 000000000000..9b60aa033f15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
@@ -0,0 +1,9 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 8 } } */
+
+
+unsigned test(char a, unsigned long long b)
+{
+        return b;
+}
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c 
b/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c
new file mode 100644
index 000000000000..146cf9456087
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 12 } } */
+
+unsigned long long test(void)
+{
+       return 0xffffffff00000000UL;
+}

Reply via email to