------- Comment #1 from spop at gcc dot gnu dot org 2010-03-18 22:04 -------
Also note that a similar problem occurs for hadamard8:
gcc-4.5 -c hadamard8.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2
[...]
hadamard8_diff.c:44: note: not vectorized: unhandled data-ref
hadamard8_diff.c:26: note: not vectorized: data ref analysis failed D.2771_12 =
*D.2770_11;
For which we fail to analyze one of the data references as well.
Note that ICC 11.0 does vectorize this kernel.
typedef unsigned char uint8_t;
typedef unsigned long int uint64_t;
typedef long int x86_reg;
#define BUTTERFLY2(o1,o2,i1,i2) \
o1= (i1)+(i2);\
o2= (i1)-(i2);
#define BUTTERFLY1(x,y) \
{\
int a,b;\
a= x;\
b= y;\
x= a+b;\
y= a-b;\
}
#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h)
{
int i;
int temp[64];
int sum=0;
for(i=0; i<8; i++){
//FIXME try pointer walks
BUTTERFLY2(temp[8*i+0], temp[8*i+1],
src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
BUTTERFLY2(temp[8*i+2], temp[8*i+3],
src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
BUTTERFLY2(temp[8*i+4], temp[8*i+5],
src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
BUTTERFLY2(temp[8*i+6], temp[8*i+7],
src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
}
for(i=0; i<8; i++){
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
sum +=
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
}
return sum;
}
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43436