https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66612

--- Comment #2 from amker at gcc dot gnu.org ---
Hi,
I had a look of generated assembly.  The old code is as below:
        .file   "20050830-1.c"
        .machine power4
        .section        ".toc","aw"
        .section        ".text"
        .section        ".toc","aw"
.LC0:
        .quad   a
        .section        ".text"
        .align 2
        .p2align 4,,15
        .globl foo
        .section        ".opd","aw"
        .align 3
foo:
        .quad   .L.foo,.TOC.@tocbase,0
        .previous
        .type   foo, @function
.L.foo:
        cmpwi 7,3,511
        ble 7,.L4
        lis 9,0xffff
        addis 8,2,.LC0@toc@ha           # gpr load fusion, type long
        ld 8,.LC0@toc@l(8)
        li 10,42
        ori 9,9,0xff00
        rldicl 9,9,0,32
        add 9,3,9
        cmpwi 7,9,256
        addi 9,9,-256
        rldicl 9,9,56,40
        addi 9,9,1
        mtctr 9
        blt- 7,.L9
        .p2align 5,,31
.L3:
        sldi 9,3,2
        addi 3,3,-256
        extsw 3,3
        stwx 10,8,9
        bdnz .L3
.L4:
        li 3,0
        blr
.L9:
        li 9,1
        mtctr 9
        b .L3
        .long 0
        .byte 0,0,0,0,0,0,0,0
        .size   foo,.-.L.foo
        .ident  "GCC: (GNU) 6.0.0 20150602 (experimental)"


And now it is as below
        .file   "20050830-1.c"
        .machine power4
        .section        ".toc","aw"
        .section        ".text"
        .section        ".toc","aw"
.LC1:
        .quad   a
        .section        ".text"
        .align 2
        .p2align 4,,15
        .globl foo
        .section        ".opd","aw"
        .align 3
foo:
        .quad   .L.foo,.TOC.@tocbase,0
        .previous
        .type   foo, @function
.L.foo:
        cmpwi 7,3,511
        ble 7,.L4
        addi 9,3,-512
        addis 10,2,.LC1@toc@ha          # gpr load fusion, type long
        ld 10,.LC1@toc@l(10)
        rlwinm 9,9,0,0,23
        subf 9,9,3
        sldi 3,3,2
        sldi 9,9,2
        add 3,3,10
        addi 9,9,-1024
        add 9,9,10
        li 10,42
        .p2align 4,,15
.L3:
        stw 10,0(3)
        addi 3,3,-1024
        cmpld 7,3,9
        bne 7,.L3
.L4:
        li 3,0
        blr
        .long 0
        .byte 0,0,0,0,0,0,0,0
        .size   foo,.-.L.foo
        .ident  "GCC: (GNU) 6.0.0 20150627 (experimental)"

The difference is because IVOPT chooses arrary address IV and use it to
eliminate the old comparison.  That's why the bdn instruction isn't generated.

I am not good in ppc assembly code, but seems to me the code is improved since
there are one fewer instructions in loop now.

BTW, which instruction in old assembly's loop is the store instruction?

Thanks,
bin

Reply via email to