https://gcc.gnu.org/g:8cb837ea238d974254459c1fe5ce2cf39251b452
commit r16-8495-g8cb837ea238d974254459c1fe5ce2cf39251b452 Author: Richard Biener <[email protected]> Date: Wed Apr 1 12:39:43 2026 +0200 tree-optimization/124743 - SLP scheduling of invariant internal ops The following makes us avoid scheduling invariant internal operations to random points in the CFG when doing loop vectorization since in that case the stmt UIDs we use for dominance checks are not initialized outside of loop bodies. Instead schedule such operations by appending to the loop preheader which is where invariants generally end up for loop vectorization. PR tree-optimization/124743 * tree-vect-slp.cc (vect_schedule_slp_node): Schedule loop invariant operations in the loop preheader. * gcc.dg/vect/vect-pr124743.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/vect-pr124743.c | 12 ++++++++ gcc/tree-vect-slp.cc | 49 +++++++++++++++++++------------ 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr124743.c b/gcc/testsuite/gcc.dg/vect/vect-pr124743.c new file mode 100644 index 000000000000..542350aa8ef5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr124743.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -fno-tree-loop-distribute-patterns -fno-tree-loop-distribution -fno-tree-ccp -fno-tree-copy-prop -fno-tree-dse" } */ + +int a, b, c, d; +int e(int f, int g) { return g < 0 || g > 1 ? 0 : f >> g; } +int h(int i) { return a > 1 ? 0 : i << a; } +int main() +{ + for (; b; b++) + c = e(h(1), d); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 8fa6a740c96c..9d2d0f98ea85 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -12081,25 +12081,38 @@ vect_schedule_slp_node (vec_info *vinfo, si = gsi_for_stmt (last_stmt); gsi_next (&si); - /* Avoid scheduling internal defs outside of the loop when - we might have only implicitly tracked loop mask/len defs. */ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) - if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) - || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) - { - gimple_stmt_iterator si2 - = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); - if ((gsi_end_p (si2) - && (LOOP_VINFO_LOOP (loop_vinfo)->header - != gimple_bb (last_stmt)) - && dominated_by_p (CDI_DOMINATORS, - LOOP_VINFO_LOOP (loop_vinfo)->header, - gimple_bb (last_stmt))) - || (!gsi_end_p (si2) - && last_stmt != *si2 - && vect_stmt_dominates_stmt_p (last_stmt, *si2))) - si = si2; - } + { + /* Avoid scheduling stmts to random places in the CFG, any + stmt dominance check we performed is possibly wrong as UIDs + are not initialized for all of the function for loop + vectorization. Instead append to the loop preheader. */ + if ((LOOP_VINFO_LOOP (loop_vinfo)->header + != gimple_bb (last_stmt)) + && dominated_by_p (CDI_DOMINATORS, + LOOP_VINFO_LOOP (loop_vinfo)->header, + gimple_bb (last_stmt))) + si = gsi_end_bb (loop_preheader_edge + (LOOP_VINFO_LOOP (loop_vinfo))->src); + /* Avoid scheduling internal defs outside of the loop when + we might have only implicitly tracked loop mask/len defs. */ + if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) + { + gimple_stmt_iterator si2 + = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); + if ((gsi_end_p (si2) + && (LOOP_VINFO_LOOP (loop_vinfo)->header + != gimple_bb (last_stmt)) + && dominated_by_p (CDI_DOMINATORS, + LOOP_VINFO_LOOP (loop_vinfo)->header, + gimple_bb (last_stmt))) + || (!gsi_end_p (si2) + && last_stmt != *si2 + && vect_stmt_dominates_stmt_p (last_stmt, *si2))) + si = si2; + } + } } }
