------- Comment #1 from spop at gcc dot gnu dot org  2010-03-18 22:04 -------
Also note that a similar problem occurs for hadamard8:
gcc-4.5 -c hadamard8.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2
[...]
hadamard8_diff.c:44: note: not vectorized: unhandled data-ref 
hadamard8_diff.c:26: note: not vectorized: data ref analysis failed D.2771_12 =
*D.2770_11;

For which we fail to analyze one of the data references as well.
Note that ICC 11.0 does vectorize this kernel.


typedef unsigned char uint8_t;
typedef unsigned long int uint64_t;
typedef long int x86_reg;

#define BUTTERFLY2(o1,o2,i1,i2) \
o1= (i1)+(i2);\
o2= (i1)-(i2);

#define BUTTERFLY1(x,y) \
{\
    int a,b;\
    a= x;\
    b= y;\
    x= a+b;\
    y= a-b;\
}

#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))

int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h)
{
    int i;
    int temp[64];
    int sum=0;

    for(i=0; i<8; i++){
        //FIXME try pointer walks
        BUTTERFLY2(temp[8*i+0], temp[8*i+1],
src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
        BUTTERFLY2(temp[8*i+2], temp[8*i+3],
src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
        BUTTERFLY2(temp[8*i+4], temp[8*i+5],
src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
        BUTTERFLY2(temp[8*i+6], temp[8*i+7],
src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);

        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);

        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
    }

    for(i=0; i<8; i++){
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);

        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);

        sum +=
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
    }
    return sum;
}


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43436

Reply via email to