These kernels from FFmpeg are not vectorized with:
gcc-4.5 -c diff_pixels.c -O3 -ffast-math -ftree-vectorizer-verbose=7 -msse2
[...]
diff_pixels.c:10: note: not vectorized: data ref analysis failed D.2726_9 =
*s1_100;
Note that ICC 11.0 does vectorize these loop kernels.
The difficulty seems to be that one of the data references is a pointer
incremented by a parameter that is not a constant integer known at
compile time.
typedef short DCTELEM;
typedef unsigned char uint8_t;
void diff_pixels_c(DCTELEM *__restrict__ block, const uint8_t *s1,
const uint8_t *s2, int stride)
{
int i;
/* read the pixels */
for(i=0;i<8;i++)
{
block[0] = s1[0] - s2[0];
block[1] = s1[1] - s2[1];
block[2] = s1[2] - s2[2];
block[3] = s1[3] - s2[3];
block[4] = s1[4] - s2[4];
block[5] = s1[5] - s2[5];
block[6] = s1[6] - s2[6];
block[7] = s1[7] - s2[7];
s1 += stride;
s2 += stride;
block += 8;
}
}
typedef short DCTELEM;
typedef unsigned char uint8_t;
typedef long int x86_reg;
typedef unsigned int uint32_t;
typedef unsigned long int uint64_t;
int
pix_sum_c (uint8_t * pix, int line_size)
{
int s, i, j;
s = 0;
for (i = 0; i < 16; i++)
{
for (j = 0; j < 16; j += 8)
{
s += pix[0];
s += pix[1];
s += pix[2];
s += pix[3];
s += pix[4];
s += pix[5];
s += pix[6];
s += pix[7];
pix += 8;
}
pix += line_size - 16;
}
return s;
}
--
Summary: Missed vectorization: "not vectorized: data ref
analysis": pointer incremented by a parameter
Product: gcc
Version: 4.5.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: spop at gcc dot gnu dot org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43434