Compiling the following simple code sample with
-O3 on CELL spu produces very poor performing
code:
#include <spu_intrinsics.h>
typedef struct _vec_tri {
vector float x;
vector float y;
vector float z;
} vec_tri;
vec_tri add_tri(vec_tri a, vec_tri b)
{
vec_tri c;
c.x = spu_add(a.x, b.x);
c.y = spu_add(a.y, b.y);
c.z = spu_add(a.z, b.z);
return (c);
}
The assembly:
.file "struct.c"
.text
.align 3
.global add_tri
.type add_tri, @function
add_tri:
hbr .L3,$lr
stqd $sp,-224($sp)
ai $sp,$sp,-224
stqd $3,80($sp)
stqd $4,96($sp)
stqd $6,128($sp)
stqd $7,144($sp)
stqd $8,160($sp)
stqd $5,112($sp)
lqd $5,112($sp)
lqd $10,128($sp)
lqd $9,80($sp)
lqd $8,144($sp)
lqd $7,96($sp)
lqd $6,160($sp)
fa $3,$9,$10
fa $4,$7,$8
fa $2,$5,$6
stqd $3,32($sp)
lqd $3,32($sp)
stqd $4,48($sp)
stqd $2,64($sp)
lqd $4,48($sp)
lqd $5,64($sp)
ai $sp,$sp,224
--
Summary: gcc for CELL spu produces poor code using functions with
structure parameters
Product: gcc
Version: 4.4.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: eres at il dot ibm dot com
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35626