[PATCH] rl78 subdi3 improvement

Sebastian Perta Fri, 20 Oct 2017 06:00:01 -0700

Hello,

The following patch improves both the speed and code size for 64 bit 
subtraction for RL78:
it emits a library function call instead of emitting code for  the 64 bit add 
for every single subtraction.
The subtraction function which was added in libgcc is hand written, so more 
optimal than what GCC generates.


The change can easily be seen on the following test case.
long long my_subdi3(long long a, long long b) {
return a - b;
}
I did not add this to the regression as it very simple and there are many test 
cases in the regression which test this, for example 
gcc.c-torture/execute/20041011-1.c and  gcc.c-torture/execute/arith-rand-ll.c  
and so on.

Regression test is OK, tested with the following command:
make -k check-gcc RUNTESTFLAGS=--target_board=rl78-sim

Please let me know if this is OK, Thank you!
Sebastian

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog(revision 253893)
+++ gcc/ChangeLog(working copy)
@@ -1,3 +1,7 @@
+2017-10-13  Sebastian Perta  <sebastian.pe...@renesas.com>
+
+* config/rl78/rl78.md: New define_expand "subdi3".
+
 2017-10-19  Eric Botcazou  <ebotca...@adacore.com>

 PR debug/82509
Index: gcc/config/rl78/rl78.md
===================================================================
--- gcc/config/rl78/rl78.md(revision 253893)
+++ gcc/config/rl78/rl78.md(working copy)
@@ -268,6 +268,16 @@
   DONE;"
 )

+(define_expand "subdi3"
+ [(set (match_operand:DI          0 "nonimmediate_operand" "")
+    (minus:DI (match_operand:DI 1 "general_operand"      "")
+         (match_operand:DI    2 "general_operand"      "")))
+   ]
+  ""
+  "rl78_emit_libcall (\"__subdi3\", MINUS, DImode, DImode, 3, operands);
+   DONE;"
+)
+
 (define_insn "subsi3_internal_virt"
   [(set (match_operand:SI           0 "nonimmediate_operand" "=v,&vm, vm")
 (minus:SI (match_operand:SI 1 "general_operand"      "0, vim, vim")
Index: libgcc/ChangeLog
===================================================================
--- libgcc/ChangeLog(revision 253893)
+++ libgcc/ChangeLog(working copy)
@@ -1,5 +1,10 @@
 2017-10-13  Sebastian Perta  <sebastian.pe...@renesas.com>

+* config/rl78/subdi3.S: New assembly file.
+* config/rl78/t-rl78: Added subdi3.S to LIB2ADD.
+
+2017-10-13  Sebastian Perta  <sebastian.pe...@renesas.com>
+
 * config/rl78/adddi3.S: New assembly file.
 * config/rl78/t-rl78: Added adddi3.S to LIB2ADD.

Index: libgcc/config/rl78/t-rl78
===================================================================
--- libgcc/config/rl78/t-rl78(revision 253893)
+++ libgcc/config/rl78/t-rl78(working copy)
@@ -31,7 +31,8 @@
 $(srcdir)/config/rl78/fpbit-sf.S \
 $(srcdir)/config/rl78/fpmath-sf.S \
 $(srcdir)/config/rl78/cmpsi2.S \
-$(srcdir)/config/rl78/adddi3.S
+$(srcdir)/config/rl78/adddi3.S \
+$(srcdir)/config/rl78/subdi3.S

 LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
   _popcounthi2 _popcountsi2 \
Index: libgcc/config/rl78/subdi3.S
===================================================================
--- libgcc/config/rl78/subdi3.S(nonexistent)
+++ libgcc/config/rl78/subdi3.S(working copy)
@@ -0,0 +1,58 @@
+;   Copyright (C) 2017 Free Software Foundation, Inc.
+;   Contributed by Sebastian Perta.
+;
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+;
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+;
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+
+#include "vregs.h"
+
+    .text
+
+START_FUNC ___subdi3
+
+    movw  hl, sp       ; use HL-based addressing (allows for direct subw)
+
+    movw  ax, [hl+4]
+    subw  ax, [hl+12]
+    movw  r8, ax
+
+    mov   a, [hl+6]    ; middle bytes of the result are determined using 8-bit
+    subc  a, [hl+14]   ; SUBC insns which both account for and update the 
carry bit
+    mov   r10, a       ; (no SUBWC instruction is available)
+    mov   a, [hl+7]
+    subc  a, [hl+15]
+    mov   r11, a
+
+    mov   a, [hl+8]
+    subc  a, [hl+16]
+    mov   r12, a
+    mov   a, [hl+9]
+    subc  a, [hl+17]
+    mov   r13, a
+
+    movw  ax, [hl+10]
+    sknc               ; account for the possible carry from the
+    decw  ax           ; latest 8-bit operation
+    subw  ax, [hl+18]
+    movw  r14, ax
+
+    ret
+
+END_FUNC ___subdi3
+



Renesas Electronics Europe Ltd, Dukes Meadow, Millboard Road, Bourne End, 
Buckinghamshire, SL8 5FH, UK. Registered in England & Wales under Registered 
No. 04586709.

[PATCH] rl78 subdi3 improvement

Reply via email to