https://gcc.gnu.org/g:29cc92348e0d32b7d301eae42aaebd716736f037

commit r15-9248-g29cc92348e0d32b7d301eae42aaebd716736f037
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Mon Apr 7 12:39:41 2025 +0200

    AVRrc: Tweak __[u]mulhisi3.
    
    When MUL is not available, then the __umulhisi3 and __mulhisi3
    functions can use __mulhisi3_helper.  This improves code size,
    stack footprint and runtime on AVRrc.
    
    libgcc/
            * config/avr/lib1funcs.S (__mulhisi3, __umulhisi3): Use
            __mulhisi3_helper for better performance on AVRrc.

Diff:
---
 libgcc/config/avr/lib1funcs.S | 71 ++++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 49 deletions(-)

diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 52ce051e00fe..dfe99b1ea06f 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -395,29 +395,23 @@ ENDF  __mulhi3
 
 #if defined (L_umulhisi3)
 DEFUN __umulhisi3
-#ifndef __AVR_TINY__
+#ifdef __AVR_TINY__
+    ;; Save callee saved regs.
+    push    B0
+    push    B1
+#endif /* AVR_TINY */
     wmov    B0, 24
     ;; Zero-extend B
     clr     B2
     clr     B3
     ;; Zero-extend A
     wmov    A2, B2
-    XJMP    __mulsi3
+#ifdef __AVR_TINY__
+    ;; Clear hi16 of the result so we can use __mulsi3_helper.
+    wmov    CC2, B2
+    XJMP    __mulsi3_helper
 #else
-    ;; Push zero-extended R24
-    push    __zero_reg__
-    push    __zero_reg__
-    push    r25
-    push    r24
-    ;; Zero-extend R22
-    clr     R24
-    clr     R25
-    XCALL   __mulsi3
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    ret
+    XJMP    __mulsi3
 #endif /* AVR_TINY? */
 ENDF __umulhisi3
 #endif /* L_umulhisi3 */
@@ -425,54 +419,33 @@ ENDF __umulhisi3
 #if defined (L_mulhisi3)
 DEFUN __mulhisi3
 #ifdef __AVR_TINY__
-    ;; Push sign-extended R24
-    mov     __tmp_reg__, r25
-    lsl     __tmp_reg__
-    sbc     __tmp_reg__, __tmp_reg__
-    push    __tmp_reg__
-    push    __tmp_reg__
-    push    r25
-    push    r24
-    ;;  Sign-extend R22
-    mov     r24, r23
-    lsl     r24
-    sbc     r24, r24
-    sbc     r25, r25
-    XCALL   __mulsi3
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    pop     __tmp_reg__
-    ret
-#else
+    ;; Save callee saved regs.
+    push    B0
+    push    B1
+#endif /* AVR_TINY */
     wmov    B0, 24
     ;; Sign-extend B
     lsl     r25
     sbc     B2, B2
     mov     B3, B2
-#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
-    ;; Sign-extend A
-    clr     A2
-    sbrc    A1, 7
-    com     A2
-    mov     A3, A2
-    XJMP    __mulsi3
-#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
     ;; Zero-extend A and __mulsi3 will run at least twice as fast
     ;; compared to a sign-extended A.
     clr     A2
     clr     A3
+    ;; Clear hi16 of the result so we can use __mulsi3_helper.
+    wmov    CC2, A2
     sbrs    A1, 7
-    XJMP __mulsi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    rjmp 1f
+#else
+    XJMP    __mulsi3_helper
+#endif /* ERRATA_SKIP */
     ;; If  A < 0  then perform the  B * 0xffff.... before the
     ;; very multiplication by initializing the high part of the
     ;; result CC with -B.
-    wmov    CC2, A2
     sub     CC2, B0
     sbc     CC3, B1
-    XJMP __mulsi3_helper
-#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
-#endif /* AVR_TINY? */
+1:  XJMP    __mulsi3_helper
 ENDF __mulhisi3
 #endif /* L_mulhisi3 */

Reply via email to