https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94949

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
OK, so I guess with -Ofast (-fallow-store-data-races!) we cannot do the
optimization of eliding the loads.

diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 18e5c18c17e..554dd4be5bb 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref
*ref)
   fmt_data.orig_loop = loop;
   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);

+  bool always_stored = ref_always_accessed_p (loop, ref, true);
   if (bb_in_transaction (loop_preheader_edge (loop)->src)
-      || (! flag_store_data_races
-         && ! ref_always_accessed_p (loop, ref, true)))
+      || (! flag_store_data_races && ! always_stored))
     multi_threaded_model_p = true;

   if (multi_threaded_model_p)
@@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec<edge> exits,
im_mem_ref *ref)

   /* Avoid doing a load if there was no load of the ref in the loop.
      Esp. when the ref is not always stored we cannot optimize it
-     away later.  */
-  if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
+     away later.  But when it is not always stored we must use a conditional
+     store then.  */
+  if ((!always_stored && !multi_threaded_model_p)
+      || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
     {
       load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
       lim_data = init_lim_data (load);

Reply via email to