[Bug rtl-optimization/90255] [9 regression] r266385 caused code size regressions on Arm, thumb and thumb2

rearnsha at gcc dot gnu.org Thu, 25 Apr 2019 10:34:55 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90255


Richard Earnshaw <rearnsha at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |ramana.radhakrishnan at arm 
dot co
                   |                            |m, vmakarov at redhat dot com,
                   |                            |wdijkstr at arm dot com

--- Comment #1 from Richard Earnshaw <rearnsha at gcc dot gnu.org> ---
[committed too early]

It looks like a 64-bit constant 0 is held over a function call when the code
could just initialize the registers directly.

Code before commit:
main:
        @ Function supports interworking.
        @ args = 0, pretend = 0, frame = 24
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {r4, r5, r6, r7, r8, r9, r10, lr}  // 8 registers saved.
        ldr     r3, [r1]
        ldr     r5, .L14
        cmp     r0, #4
        mov     r4, r1
        str     r3, [r5]
        sub     sp, sp, #48   // 48 bytes stack space
        blne    usage
.L2:
        ldmib   r4, {r6, r8}
        ldr     r1, .L14+4
        mov     r0, r6
        add     r2, sp, #28
        ldr     r7, [r4, #12]
        bl      sscanf
        cmp     r0, #1
        beq     .L3
        ldr     r3, .L14+8
        ldr     r0, [r3]
        mov     r3, r6
        ldr     r2, [r5]
        ldr     r1, .L14+12
.L12:
        ldr     r0, [r0, #8]
        bl      fprintf
        mov     r0, #1
.L13:
        bl      exit
.L3:
        mov     r0, r8
        ldr     r1, .L14+16
        add     r2, sp, #44
        bl      sscanf
        cmp     r0, #1
        beq     .L4
        ldr     r3, .L14+8
        ldr     r2, [r5]
        ldr     r0, [r3]
        ldr     r1, .L14+20
        mov     r3, r8
        b       .L12
.L4:
        mov     r0, r7
        ldr     r1, .L14+24
        bl      fopen
        subs    r4, r0, #0
        bne     .L5
        ldr     r3, .L14+8
        ldr     r2, [r5]
        ldr     r0, [r3]
        ldr     r1, .L14+28
        mov     r3, r7
        b       .L12
.L5:
        mov     r1, r4
        ldr     r0, .L14+32
        bl      fputs
        mov     r5, #0
        mov     r8, #1065353216
        ldr     r9, .L14+36
.L6:
        ldr     r10, [sp, #28]
        cmp     r10, r5
        bgt     .L7
        mov     r0, r4
        bl      fclose
        mov     r0, #0
        b       .L13
.L7:
        mov     r0, r5
        bl      __aeabi_i2d
        mov     r6, r0
        mov     r0, r10
        mov     r7, r1
        bl      __aeabi_i2d
        mov     r2, r0
        mov     r3, r1
        mov     r0, r6
        mov     r1, r7
        bl      __aeabi_ddiv
        mov     r2, #0
        mov     r3, #0
        bl      __aeabi_dadd
        bl      __aeabi_d2f
        mov     r6, r0
        mov     r3, r0
        add     r2, sp, #40
        add     r1, sp, #36
        add     r0, sp, #32
        str     r8, [sp, #4]    @ float
        str     r8, [sp]        @ float
        bl      dyeHSVtoRGB
        mov     r0, r6
        bl      __aeabi_f2d
        ldr     r10, [sp, #44]  @ float
        mov     r6, r0
        mov     r7, r1
        mov     r0, r10
        ldr     r1, [sp, #40]   @ float
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        str     r0, [sp, #16]
        str     r1, [sp, #20]
        ldr     r1, [sp, #36]   @ float
        mov     r0, r10
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        str     r0, [sp, #8]
        str     r1, [sp, #12]
        ldr     r1, [sp, #32]   @ float
        mov     r0, r10
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        mov     r2, r6
        stm     sp, {r0-r1}
        mov     r3, r7
        mov     r1, r9
        mov     r0, r4
        bl      fprintf
        add     r5, r5, #1
        b       .L6

after r 266385
main:
        @ Function supports interworking.
        @ args = 0, pretend = 0, frame = 32
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {r4, r5, r6, r7, r8, r9, r10, fp, lr}  // 9 regs saved
        ldr     r3, [r1]
        ldr     r5, .L14
        cmp     r0, #4
        mov     r4, r1
        str     r3, [r5]
        sub     sp, sp, #60 // 60 bytes stack space
        blne    usage
.L2:
        ldmib   r4, {r6, r8}
        ldr     r1, .L14+4
        mov     r0, r6
        add     r2, sp, #36
        ldr     r7, [r4, #12]
        bl      sscanf
        cmp     r0, #1
        beq     .L3
        ldr     r3, .L14+8
        ldr     r0, [r3]
        mov     r3, r6
        ldr     r2, [r5]
        ldr     r1, .L14+12
.L12:
        ldr     r0, [r0, #8]
        bl      fprintf
        mov     r0, #1
.L13:
        bl      exit
.L3:
        mov     r0, r8
        ldr     r1, .L14+16
        add     r2, sp, #52
        bl      sscanf
        cmp     r0, #1
        beq     .L4
        ldr     r3, .L14+8
        ldr     r2, [r5]
        ldr     r0, [r3]
        ldr     r1, .L14+20
        mov     r3, r8
        b       .L12
.L4:
        mov     r0, r7
        ldr     r1, .L14+24
        bl      fopen
        subs    r4, r0, #0
        bne     .L5
        ldr     r3, .L14+8
        ldr     r2, [r5]
        ldr     r0, [r3]
        ldr     r1, .L14+28
        mov     r3, r7
        b       .L12
.L5:
        mov     r1, r4
        ldr     r0, .L14+32
        bl      fputs
        mov     r5, #0
        mov     r8, #0      // Additional register initialization
        mov     r9, #0      // Additional register initialization
        mov     r10, #1065353216
        ldr     fp, .L14+36
.L6:
        ldr     r3, [sp, #36]
        cmp     r3, r5
        bgt     .L7
        mov     r0, r4
        bl      fclose
        mov     r0, #0
        b       .L13
.L7:
        mov     r0, r5
        str     r3, [sp, #24]
        bl      __aeabi_i2d
        ldr     r3, [sp, #24]
        mov     r6, r0
        mov     r0, r3
        mov     r7, r1
        bl      __aeabi_i2d
        mov     r2, r0
        mov     r3, r1
        mov     r0, r6
        mov     r1, r7
        bl      __aeabi_ddiv
        mov     r2, r8   // r8,r9 are never changed, and always 0, just use
immediate value
        mov     r3, r9
        bl      __aeabi_dadd
        bl      __aeabi_d2f
        mov     r6, r0
        mov     r3, r0
        add     r2, sp, #48
        add     r1, sp, #44
        add     r0, sp, #40
        str     r10, [sp, #4]   @ float
        str     r10, [sp]       @ float
        bl      dyeHSVtoRGB
        mov     r0, r6
        bl      __aeabi_f2d
        mov     r2, r0
        mov     r3, r1
        ldr     r7, [sp, #52]   @ float
        ldr     r1, [sp, #48]   @ float
        mov     r0, r7
        str     r2, [sp, #24]
        str     r3, [sp, #28]
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        str     r0, [sp, #16]
        str     r1, [sp, #20]
        ldr     r1, [sp, #44]   @ float
        mov     r0, r7
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        str     r0, [sp, #8]
        str     r1, [sp, #12]
        ldr     r1, [sp, #40]   @ float
        mov     r0, r7
        bl      __aeabi_fmul
        bl      __aeabi_f2d
        add     r3, sp, #24
        ldmia   r3, {r2-r3}
        stm     sp, {r0-r1}
        mov     r1, fp
        mov     r0, r4
        bl      fprintf
        add     r5, r5, #1
        b       .L6

[Bug rtl-optimization/90255] [9 regression] r266385 caused code size regressions on Arm, thumb and thumb2

Reply via email to