https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65413
Bug ID: 65413 Summary: inefficient code returning aggregates on powepc64le Product: gcc Version: 5.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: msebor at gcc dot gnu.org When returning aggregates that don't fit into a single register on powerpc64le, gcc emits the assembly below which ultimately results in the same values in the r3 and r4 registers as it started out with. All the shift and rotate instructions are unnecessary (and in fact, foo could be as simple as a blr). $ cat ~/tmp/x.c && gcc -O2 -S -Wall -o/dev/tty ~/tmp/x.c typedef struct { int a[3]; } A; A __attribute__ ((noinline)) foo (A a) { return a; } A bar (A a) { return foo (a); } .file "x.c" .machine power8 .abiversion 2 .section ".toc","aw" .section ".text" .align 2 .p2align 4,,15 .globl foo .type foo, @function foo: mr 9,3 li 3,0 rldicl 10,9,0,32 srdi 9,9,32 rldimi 3,10,0,32 rldicl 4,4,0,32 rldimi 3,9,32,0 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size foo,.-foo .align 2 .p2align 4,,15 .globl bar .type bar, @function bar: 0: addis 2,12,.TOC.-0b@ha addi 2,2,.TOC.-0b@l .localentry bar,.-bar mflr 0 std 0,16(1) stdu 1,-64(1) bl foo addi 1,1,64 ld 0,16(1) mr 9,3 li 3,0 rldicl 10,9,0,32 srdi 9,9,32 rldimi 3,10,0,32 rldicl 4,4,0,32 mtlr 0 rldimi 3,9,32,0 blr .long 0 .byte 0,0,0,1,128,0,0,0 .size bar,.-bar .ident "GCC: (GNU) 5.0.0 20150303 (experimental)" .section .note.GNU-stack,"",@progbits