https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122505

--- Comment #1 from GCC Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Georg-Johann Lay <[email protected]>:

https://gcc.gnu.org/g:ad8de026441c3e57c4761b1c595ace92ed21c254

commit r16-4929-gad8de026441c3e57c4761b1c595ace92ed21c254
Author: Georg-Johann Lay <[email protected]>
Date:   Sat Nov 1 17:33:13 2025 +0100

    AVR: PR122505 - Fix bloated mulpsi3 in the wake of hacking around PR118012.

    Since the PR118012 work-around patch, there is an SImode insn also for
    the non-MUL case, but there is no mulpsi3.  This makes the middle-end
    use the mulsi3 insn for 24-bit multipications like in:

    __uint24 mul24 (__uint24 a, __uint24 b)
    {
        return a * b;
    }

    which will be compiled to:

    mul24:
        push r28         ;  34  [c=4 l=1]  pushqi1/0
        push r29         ;  35  [c=4 l=1]  pushqi1/0
        in r28,__SP_L__  ;  47  [c=4 l=2]  *movhi/7
        in r29,__SP_H__
        sbiw r28,8   ;  48  [c=8 l=1]  *addhi3/2
        in __tmp_reg__,__SREG__  ;  38  [c=8 l=5]  movhi_sp_r/2
        cli
        out __SP_H__,r29
        out __SREG__,__tmp_reg__
        out __SP_L__,r28
    /* prologue: function */
    /* frame size = 8 */
    /* stack size = 10 */
        std Y+1,r22  ;  49  [c=4 l=1]  movqi_insn/2
        std Y+2,r23  ;  50  [c=4 l=1]  movqi_insn/2
        std Y+3,r24  ;  51  [c=4 l=1]  movqi_insn/2
        std Y+5,r18  ;  52  [c=4 l=1]  movqi_insn/2
        std Y+6,r19  ;  53  [c=4 l=1]  movqi_insn/2
        std Y+7,r20  ;  54  [c=4 l=1]  movqi_insn/2
        ldd r18,Y+1  ;  55  [c=4 l=1]  movqi_insn/3
        ldd r19,Y+2  ;  56  [c=4 l=1]  movqi_insn/3
        ldd r20,Y+3  ;  57  [c=4 l=1]  movqi_insn/3
        ldd r21,Y+4  ;  58  [c=4 l=1]  movqi_insn/3
        ldd r22,Y+5  ;  59  [c=4 l=1]  movqi_insn/3
        ldd r23,Y+6  ;  60  [c=4 l=1]  movqi_insn/3
        ldd r24,Y+7  ;  61  [c=4 l=1]  movqi_insn/3
        ldd r25,Y+8  ;  62  [c=4 l=1]  movqi_insn/3
        call __mulsi3    ;  33  [c=20 l=2]  *mulsi3_call_pr118012
    /* epilogue start */
        adiw r28,8   ;  63  [c=8 l=1]  *addhi3/2
        in __tmp_reg__,__SREG__  ;  42  [c=8 l=5]  movhi_sp_r/2
        cli
        out __SP_H__,r29
        out __SREG__,__tmp_reg__
        out __SP_L__,r28
        pop r29      ;  43  [c=4 l=1]  popqi
        pop r28      ;  44  [c=4 l=1]  popqi
        ret

    where the expected code is simply:

    mul24:
        call __mulpsi3   ;  9   [c=20 l=2]  call_value_insn/1
    /* epilogue start */
        ret      ;  24  [c=0 l=1]  return

    The patch just allows the mulpsi3 insn for the non-MUL case, except for
    AVR_TINY which passes the 2nd argument on the stack so no insn can be used.

    The change might be beneficial even in the absence of PR118012 because
    the __mulpsi3 footprint is leaner than a libcall.

            PR tree-optimization/118012
            PR tree-optimization/122505
    gcc/
            * config/avr/avr.md (mulpsi3): Also allow the insn condition
            in the case where avropt_pr118012 && !AVR_TINY.
            (*mulpsi3): Handle split for the !AVR_HAVE_MUL case.
            (*mulpsi3-nomul.libgcc_split, *mulpsi3-nomul.libgcc): New insns.

Reply via email to