Vectorizer fails to handle this:
------------------------------------------------------------ #define NUMPOINTS 50000 #define align(x) __attribute__((align(x))) typedef float align(16) MATRIX[3][3]; static float points[NUMPOINTS][4]; static align(16) float opoints[NUMPOINTS][4]; static bool flags[NUMPOINTS]; static MATRIX gmatrix; void RotateVectors (void) { int i, r; for (r = 0; r < 4; r++) { for (i = 0; i < NUMPOINTS; i++) { opoints[i][0] = gmatrix[0][0] * points[i][0] + gmatrix[0][1] * points[i][1] + gmatrix[0][2] * points[i][2]; opoints[i][1] = gmatrix[1][0] * points[i][0] + gmatrix[1][1] * points[i][1] + gmatrix[1][2] * points[i][2]; opoints[i][2] = gmatrix[2][0] * points[i][0] + gmatrix[2][1] * points[i][1] + gmatrix[2][2] * points[i][2]; flags[i] = true; } } } ------------------------------------------------------------ loop at bench.cc:52: not vectorized: complicated access pattern. loop at bench.cc:52: bad data access. -- Summary: vectorizer failed for vector matrix multiplication Product: gcc Version: 4.0.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: enhancement Priority: P2 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: giovannibajo at libero dot it CC: gcc-bugs at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18438