Hi!

The following testcases are miscompiled on s390x-linux, because the
doloop_optimize
  /* Ensure that the new sequence doesn't clobber a register that
     is live at the end of the block.  */
  {
    bitmap modified = BITMAP_ALLOC (NULL);
  
    for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
      note_stores (i, record_reg_sets, modified);
      
    basic_block loop_end = desc->out_edge->src;
    bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
check doesn't work as intended.
The problem is that it uses df, but the df analysis was only done using
  iv_analysis_loop_init (loop);
->
  df_analyze_loop (loop);
which computes df inside on the bbs of the loop.
While loop_end bb is inside of the loop, df_get_live_out computed that
way includes registers set in the loop and used at the start of the next
iteration, but doesn't include registers set in the loop (or before the
loop) and used after the loop.

The following patch fixes that by doing whole function df_analyze first,
changes the loop iteration mode from 0 to LI_ONLY_INNERMOST (on many
targets which use can_use_doloop_if_innermost target hook a so are known
to only handle innermost loops) or LI_FROM_INNERMOST (I think only bfin
actually allows non-innermost loops) and checking not just
df_get_live_out (loop_end) (that is needed for something used by the
next iteration), but also df_get_live_in (desc->out_edge->dest),
i.e. what will be used after the loop.  df of such a bb shouldn't
be affected by the df_analyze_loop and so should be from df_analyze
of the whole function.

Bootstrapped/regtested on {x86_64,i686,s390x,powerpc64le,aarch64}-linux,
ok for trunk?

2024-12-05  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/113994
        PR rtl-optimization/116799
        * loop-doloop.cc: Include targhooks.h.
        (doloop_optimize): Also punt on intersection of modified
        with df_get_live_in (desc->out_edge->dest).
        (doloop_optimize_loops): Call df_analyze.  Use
        LI_ONLY_INNERMOST or LI_FROM_INNERMOST instead of 0 as
        second loops_list argument.

        * gcc.c-torture/execute/pr116799.c: New test.
        * g++.dg/torture/pr113994.C: New test.

--- gcc/loop-doloop.cc.jj       2024-10-25 10:00:29.485767614 +0200
+++ gcc/loop-doloop.cc  2024-12-03 15:03:20.774349797 +0100
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.
 #include "loop-unroll.h"
 #include "regs.h"
 #include "df.h"
+#include "targhooks.h"
 
 /* This module is used to modify loops with a determinable number of
    iterations to use special low-overhead looping instructions.
@@ -800,6 +801,18 @@ doloop_optimize (class loop *loop)
 
     basic_block loop_end = desc->out_edge->src;
     bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
+    /* iv_analysis_loop_init calls df_analyze_loop, which computes just
+       partial df for blocks of the loop only.  The above will catch if
+       any of the modified registers are use inside of the loop body, but
+       it will most likely not have accurate info on registers used
+       at the destination of the out_edge.  We call df_analyze on the
+       whole function at the start of the pass though and iterate only
+       on innermost loops or from innermost loops, so
+       live in on desc->out_edge->dest should be still unmodified from
+       the initial df_analyze.  */
+    if (!fail)
+      fail = bitmap_intersect_p (df_get_live_in (desc->out_edge->dest),
+                                modified);
     BITMAP_FREE (modified);
 
     if (fail)
@@ -825,7 +838,12 @@ doloop_optimize_loops (void)
       df_live_set_all_dirty ();
     }
 
-  for (auto loop : loops_list (cfun, 0))
+  df_analyze ();
+
+  for (auto loop : loops_list (cfun,
+                              targetm.can_use_doloop_p
+                              == can_use_doloop_if_innermost
+                              ? LI_ONLY_INNERMOST : LI_FROM_INNERMOST))
     doloop_optimize (loop);
 
   if (optimize == 1)
--- gcc/testsuite/gcc.c-torture/execute/pr116799.c.jj   2024-12-03 
14:54:56.254293446 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr116799.c      2024-12-03 
14:57:18.929342570 +0100
@@ -0,0 +1,41 @@
+/* PR rtl-optimization/116799 */
+
+const char *l;
+
+__attribute__((noipa)) void
+foo (const char *x, const char *y, int z)
+{
+  if (x != l + 1 || y != x || z)
+    __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+bar (const char *x, char *v)
+{
+  const char *w = x + __builtin_strlen (x);
+
+  while (x[0] == '*' && x < w - 1)
+    x++;
+
+  const char *y = w - 1;
+  int z = 1;
+  if (y >= x)
+    {
+      while (y - x > 0 && *y == '*')
+       y--;
+      z = 0;
+    }
+  int i = 0;
+  if (z)
+    v[i++] = 'a';
+  v[i] = 'b';
+  foo (x, y, z);
+}
+
+int
+main ()
+{
+  char v[2] = { 0 };
+  l = "**";
+  bar (l, v);
+}
--- gcc/testsuite/g++.dg/torture/pr113994.C.jj  2024-12-03 15:00:30.622701353 
+0100
+++ gcc/testsuite/g++.dg/torture/pr113994.C     2024-12-03 15:00:54.356373345 
+0100
@@ -0,0 +1,31 @@
+// PR rtl-optimization/113994
+// { dg-do run }
+
+#include <string>
+
+void
+foo (const std::string &x, size_t &y, std::string &z)
+{
+  size_t w = x.find (']');
+  if (w >= x.size ())
+    return;
+  else if (w == 1)
+    y = std::string::npos;
+  while (++w < x.size () && x[w] == u'.')
+    ;
+  z = x.substr (w);
+}
+
+__attribute__((noipa)) void
+bar ()
+{
+}
+
+int
+main ()
+{
+  size_t y = 0;
+  std::string z;
+  foo ("[0]", y, z);
+  bar ();
+}

        Jakub

Reply via email to