https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69493
Bug ID: 69493 Summary: Poor code generation for return of struct containing vectors on PPC64LE Product: gcc Version: 6.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: wschmidt at gcc dot gnu.org CC: dje at gcc dot gnu.org, munroesj at us dot ibm.com Target Milestone: --- Target: powerpc64le-*-linux* For this simple test using the ELFv2 ABI, we generate code that correctly returns the structures in v2 and v3 (vs34 and vs35). However, there is an unnecessary store and reload of the vectors prior to the return, which will perform extremely poorly on POWER processors. typedef struct { __vector double vx0; __vector double vx1; } vdoublex4_t; vdoublex4_t test_big_double (long double a, long double b) { vdoublex4_t result; /* return a 0.0 */ result.vx0[0] = __builtin_unpack_longdouble (a, 0); result.vx0[1] = __builtin_unpack_longdouble (a, 1); result.vx1[0] = __builtin_unpack_longdouble (b, 0); result.vx1[1] = __builtin_unpack_longdouble (b, 1); return (result); } $ gcc -S -O2 poor.c poor.s: .file "poor.c" .abiversion 2 .section ".toc","aw" .section ".text" .align 2 .p2align 4,,15 .globl test_big_double .type test_big_double, @function test_big_double: fmr 10,1 fmr 11,3 addi 8,1,-96 li 10,32 xxlxor 12,12,12 li 9,48 xxlor 0,12,12 xxpermdi 12,12,10,0 xxpermdi 0,0,11,0 xxpermdi 12,2,12,1 xxpermdi 0,4,0,1 xxpermdi 12,12,12,2 xxpermdi 0,0,0,2 stxvd2x 12,8,10 stxvd2x 0,8,9 lxvd2x 34,8,10 lxvd2x 35,8,9 xxpermdi 34,34,34,2 xxpermdi 35,35,35,2 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size test_big_double,.-test_big_double .ident "GCC: (GNU) 6.0.0 20160125 (experimental) [trunk revision 232783]" .section .note.GNU-stack,"",@progbits