https://gcc.gnu.org/g:29cc92348e0d32b7d301eae42aaebd716736f037
commit r15-9248-g29cc92348e0d32b7d301eae42aaebd716736f037 Author: Georg-Johann Lay <a...@gjlay.de> Date: Mon Apr 7 12:39:41 2025 +0200 AVRrc: Tweak __[u]mulhisi3. When MUL is not available, then the __umulhisi3 and __mulhisi3 functions can use __mulhisi3_helper. This improves code size, stack footprint and runtime on AVRrc. libgcc/ * config/avr/lib1funcs.S (__mulhisi3, __umulhisi3): Use __mulhisi3_helper for better performance on AVRrc. Diff: --- libgcc/config/avr/lib1funcs.S | 71 ++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S index 52ce051e00fe..dfe99b1ea06f 100644 --- a/libgcc/config/avr/lib1funcs.S +++ b/libgcc/config/avr/lib1funcs.S @@ -395,29 +395,23 @@ ENDF __mulhi3 #if defined (L_umulhisi3) DEFUN __umulhisi3 -#ifndef __AVR_TINY__ +#ifdef __AVR_TINY__ + ;; Save callee saved regs. + push B0 + push B1 +#endif /* AVR_TINY */ wmov B0, 24 ;; Zero-extend B clr B2 clr B3 ;; Zero-extend A wmov A2, B2 - XJMP __mulsi3 +#ifdef __AVR_TINY__ + ;; Clear hi16 of the result so we can use __mulsi3_helper. + wmov CC2, B2 + XJMP __mulsi3_helper #else - ;; Push zero-extended R24 - push __zero_reg__ - push __zero_reg__ - push r25 - push r24 - ;; Zero-extend R22 - clr R24 - clr R25 - XCALL __mulsi3 - pop __tmp_reg__ - pop __tmp_reg__ - pop __tmp_reg__ - pop __tmp_reg__ - ret + XJMP __mulsi3 #endif /* AVR_TINY? */ ENDF __umulhisi3 #endif /* L_umulhisi3 */ @@ -425,54 +419,33 @@ ENDF __umulhisi3 #if defined (L_mulhisi3) DEFUN __mulhisi3 #ifdef __AVR_TINY__ - ;; Push sign-extended R24 - mov __tmp_reg__, r25 - lsl __tmp_reg__ - sbc __tmp_reg__, __tmp_reg__ - push __tmp_reg__ - push __tmp_reg__ - push r25 - push r24 - ;; Sign-extend R22 - mov r24, r23 - lsl r24 - sbc r24, r24 - sbc r25, r25 - XCALL __mulsi3 - pop __tmp_reg__ - pop __tmp_reg__ - pop __tmp_reg__ - pop __tmp_reg__ - ret -#else + ;; Save callee saved regs. + push B0 + push B1 +#endif /* AVR_TINY */ wmov B0, 24 ;; Sign-extend B lsl r25 sbc B2, B2 mov B3, B2 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Sign-extend A - clr A2 - sbrc A1, 7 - com A2 - mov A3, A2 - XJMP __mulsi3 -#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ ;; Zero-extend A and __mulsi3 will run at least twice as fast ;; compared to a sign-extended A. clr A2 clr A3 + ;; Clear hi16 of the result so we can use __mulsi3_helper. + wmov CC2, A2 sbrs A1, 7 - XJMP __mulsi3 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + rjmp 1f +#else + XJMP __mulsi3_helper +#endif /* ERRATA_SKIP */ ;; If A < 0 then perform the B * 0xffff.... before the ;; very multiplication by initializing the high part of the ;; result CC with -B. - wmov CC2, A2 sub CC2, B0 sbc CC3, B1 - XJMP __mulsi3_helper -#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ -#endif /* AVR_TINY? */ +1: XJMP __mulsi3_helper ENDF __mulhisi3 #endif /* L_mulhisi3 */