The following fixes vectorization of loops with CLOBBER statements in the way of ignoring them during analysis and removing them during vectorization (because we ignored them for store/load placement and do not handle them in case we need to unroll the loop body).
Bootstrap / regtest pending on x86_64-unknown-linux-gnu. Do you see a better way of handling the CLOBBERs that is worthwhile? Thanks, Richard. 2013-05-28 Richard Biener <rguent...@suse.de> PR tree-optimization/56787 * tree-vect-data-refs.c (vect_analyze_data_refs): Drop clobbers from the list of data references. * tree-vect-loop.c (vect_determine_vectorization_factor): Skip clobbers. (vect_analyze_loop_operations): Likewise. (vect_transform_loop): Remove clobbers. * gcc.dg/vect/pr56787.c: New testcase. Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c (revision 199374) --- gcc/tree-vect-data-refs.c (working copy) *************** vect_analyze_data_refs (loop_vec_info lo *** 2861,2866 **** --- 2861,2867 ---- bool gather = false; int vf; + again: if (!dr || !DR_REF (dr)) { if (dump_enabled_p ()) *************** vect_analyze_data_refs (loop_vec_info lo *** 2872,2877 **** --- 2873,2891 ---- stmt = DR_STMT (dr); stmt_info = vinfo_for_stmt (stmt); + /* Discard clobbers from the dataref vector. We will remove + clobber stmts during vectorization. */ + if (gimple_clobber_p (stmt)) + { + if (i == datarefs.length () - 1) + { + datarefs.pop (); + break; + } + datarefs[i] = datarefs.pop (); + goto again; + } + /* Check that analysis of the data-ref succeeded. */ if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) || !DR_STEP (dr)) Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 199374) --- gcc/tree-vect-loop.c (working copy) *************** vect_determine_vectorization_factor (loo *** 270,277 **** gcc_assert (stmt_info); /* Skip stmts which do not need to be vectorized. */ ! if (!STMT_VINFO_RELEVANT_P (stmt_info) ! && !STMT_VINFO_LIVE_P (stmt_info)) { if (STMT_VINFO_IN_PATTERN_P (stmt_info) && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) --- 270,278 ---- gcc_assert (stmt_info); /* Skip stmts which do not need to be vectorized. */ ! if ((!STMT_VINFO_RELEVANT_P (stmt_info) ! && !STMT_VINFO_LIVE_P (stmt_info)) ! || gimple_clobber_p (stmt)) { if (STMT_VINFO_IN_PATTERN_P (stmt_info) && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) *************** vect_analyze_loop_operations (loop_vec_i *** 1431,1437 **** for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { gimple stmt = gsi_stmt (si); ! if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL)) return false; } } /* bbs */ --- 1432,1439 ---- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { gimple stmt = gsi_stmt (si); ! if (!gimple_clobber_p (stmt) ! && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL)) return false; } } /* bbs */ *************** vect_transform_loop (loop_vec_info loop_ *** 5595,5601 **** if (transform_pattern_stmt) stmt = pattern_stmt; else ! stmt = gsi_stmt (si); if (dump_enabled_p ()) { --- 5597,5613 ---- if (transform_pattern_stmt) stmt = pattern_stmt; else ! { ! stmt = gsi_stmt (si); ! /* During vectorization remove existing clobber stmts. */ ! if (gimple_clobber_p (stmt)) ! { ! unlink_stmt_vdef (stmt); ! gsi_remove (&si, true); ! release_defs (stmt); ! continue; ! } ! } if (dump_enabled_p ()) { Index: gcc/testsuite/gcc.dg/vect/pr56787.c =================================================================== *** gcc/testsuite/gcc.dg/vect/pr56787.c (revision 0) --- gcc/testsuite/gcc.dg/vect/pr56787.c (working copy) *************** *** 0 **** --- 1,35 ---- + /* { dg-do compile } */ + /* { dg-require-effective-target vect_float } */ + + inline void + bar (const float s[5], float z[3][5]) + { + float a = s[0], b = s[1], c = s[2], d = s[3], e = s[4]; + float f = 1.0f / a; + float u = f * b, v = f * c, w = f * d; + float p = 0.4f * (e - 0.5f * (b * u + c * v + d * w)); + z[0][3] = b * w; + z[1][3] = c * w; + z[2][3] = d * w + p; + } + + void + foo (unsigned long n, const float *__restrict u0, + const float *__restrict u1, const float *__restrict u2, + const float *__restrict u3, const float *__restrict u4, + const float *__restrict s0, const float *__restrict s1, + const float *__restrict s2, float *__restrict t3, + float *__restrict t4) + { + unsigned long i; + for (i = 0; i < n; i++) + { + float u[5], f[3][5]; + u[0] = u0[i]; u[1] = u1[i]; u[2] = u2[i]; u[3] = u3[i]; u[4] = u4[i]; + bar (u, f); + t3[i] = s0[i] * f[0][3] + s1[i] * f[1][3] + s2[i] * f[2][3]; + } + } + + /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */