------- Comment #1 from spop at gcc dot gnu dot org 2010-03-18 22:04 ------- Also note that a similar problem occurs for hadamard8: gcc-4.5 -c hadamard8.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2 [...] hadamard8_diff.c:44: note: not vectorized: unhandled data-ref hadamard8_diff.c:26: note: not vectorized: data ref analysis failed D.2771_12 = *D.2770_11;
For which we fail to analyze one of the data references as well. Note that ICC 11.0 does vectorize this kernel. typedef unsigned char uint8_t; typedef unsigned long int uint64_t; typedef long int x86_reg; #define BUTTERFLY2(o1,o2,i1,i2) \ o1= (i1)+(i2);\ o2= (i1)-(i2); #define BUTTERFLY1(x,y) \ {\ int a,b;\ a= x;\ b= y;\ x= a+b;\ y= a-b;\ } #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h) { int i; int temp[64]; int sum=0; for(i=0; i<8; i++){ //FIXME try pointer walks BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); BUTTERFLY1(temp[8*i+0], temp[8*i+2]); BUTTERFLY1(temp[8*i+1], temp[8*i+3]); BUTTERFLY1(temp[8*i+4], temp[8*i+6]); BUTTERFLY1(temp[8*i+5], temp[8*i+7]); BUTTERFLY1(temp[8*i+0], temp[8*i+4]); BUTTERFLY1(temp[8*i+1], temp[8*i+5]); BUTTERFLY1(temp[8*i+2], temp[8*i+6]); BUTTERFLY1(temp[8*i+3], temp[8*i+7]); } for(i=0; i<8; i++){ BUTTERFLY1(temp[8*0+i], temp[8*1+i]); BUTTERFLY1(temp[8*2+i], temp[8*3+i]); BUTTERFLY1(temp[8*4+i], temp[8*5+i]); BUTTERFLY1(temp[8*6+i], temp[8*7+i]); BUTTERFLY1(temp[8*0+i], temp[8*2+i]); BUTTERFLY1(temp[8*1+i], temp[8*3+i]); BUTTERFLY1(temp[8*4+i], temp[8*6+i]); BUTTERFLY1(temp[8*5+i], temp[8*7+i]); sum += BUTTERFLYA(temp[8*0+i], temp[8*4+i]) +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); } return sum; } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43436