On Thu, 5 Dec 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcases are miscompiled on s390x-linux, because the
> doloop_optimize
>   /* Ensure that the new sequence doesn't clobber a register that
>      is live at the end of the block.  */
>   {
>     bitmap modified = BITMAP_ALLOC (NULL);
>   
>     for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
>       note_stores (i, record_reg_sets, modified);
>       
>     basic_block loop_end = desc->out_edge->src;
>     bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
> check doesn't work as intended.
> The problem is that it uses df, but the df analysis was only done using
>   iv_analysis_loop_init (loop);
> ->
>   df_analyze_loop (loop);
> which computes df inside on the bbs of the loop.
> While loop_end bb is inside of the loop, df_get_live_out computed that
> way includes registers set in the loop and used at the start of the next
> iteration, but doesn't include registers set in the loop (or before the
> loop) and used after the loop.
> 
> The following patch fixes that by doing whole function df_analyze first,
> changes the loop iteration mode from 0 to LI_ONLY_INNERMOST (on many
> targets which use can_use_doloop_if_innermost target hook a so are known
> to only handle innermost loops) or LI_FROM_INNERMOST (I think only bfin
> actually allows non-innermost loops) and checking not just
> df_get_live_out (loop_end) (that is needed for something used by the
> next iteration), but also df_get_live_in (desc->out_edge->dest),
> i.e. what will be used after the loop.  df of such a bb shouldn't
> be affected by the df_analyze_loop and so should be from df_analyze
> of the whole function.
> 
> Bootstrapped/regtested on {x86_64,i686,s390x,powerpc64le,aarch64}-linux,
> ok for trunk?

Looks good to me.

Thanks,
Richard.

> 2024-12-05  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR rtl-optimization/113994
>       PR rtl-optimization/116799
>       * loop-doloop.cc: Include targhooks.h.
>       (doloop_optimize): Also punt on intersection of modified
>       with df_get_live_in (desc->out_edge->dest).
>       (doloop_optimize_loops): Call df_analyze.  Use
>       LI_ONLY_INNERMOST or LI_FROM_INNERMOST instead of 0 as
>       second loops_list argument.
> 
>       * gcc.c-torture/execute/pr116799.c: New test.
>       * g++.dg/torture/pr113994.C: New test.
> 
> --- gcc/loop-doloop.cc.jj     2024-10-25 10:00:29.485767614 +0200
> +++ gcc/loop-doloop.cc        2024-12-03 15:03:20.774349797 +0100
> @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.
>  #include "loop-unroll.h"
>  #include "regs.h"
>  #include "df.h"
> +#include "targhooks.h"
>  
>  /* This module is used to modify loops with a determinable number of
>     iterations to use special low-overhead looping instructions.
> @@ -800,6 +801,18 @@ doloop_optimize (class loop *loop)
>  
>      basic_block loop_end = desc->out_edge->src;
>      bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
> +    /* iv_analysis_loop_init calls df_analyze_loop, which computes just
> +       partial df for blocks of the loop only.  The above will catch if
> +       any of the modified registers are use inside of the loop body, but
> +       it will most likely not have accurate info on registers used
> +       at the destination of the out_edge.  We call df_analyze on the
> +       whole function at the start of the pass though and iterate only
> +       on innermost loops or from innermost loops, so
> +       live in on desc->out_edge->dest should be still unmodified from
> +       the initial df_analyze.  */
> +    if (!fail)
> +      fail = bitmap_intersect_p (df_get_live_in (desc->out_edge->dest),
> +                              modified);
>      BITMAP_FREE (modified);
>  
>      if (fail)
> @@ -825,7 +838,12 @@ doloop_optimize_loops (void)
>        df_live_set_all_dirty ();
>      }
>  
> -  for (auto loop : loops_list (cfun, 0))
> +  df_analyze ();
> +
> +  for (auto loop : loops_list (cfun,
> +                            targetm.can_use_doloop_p
> +                            == can_use_doloop_if_innermost
> +                            ? LI_ONLY_INNERMOST : LI_FROM_INNERMOST))
>      doloop_optimize (loop);
>  
>    if (optimize == 1)
> --- gcc/testsuite/gcc.c-torture/execute/pr116799.c.jj 2024-12-03 
> 14:54:56.254293446 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr116799.c    2024-12-03 
> 14:57:18.929342570 +0100
> @@ -0,0 +1,41 @@
> +/* PR rtl-optimization/116799 */
> +
> +const char *l;
> +
> +__attribute__((noipa)) void
> +foo (const char *x, const char *y, int z)
> +{
> +  if (x != l + 1 || y != x || z)
> +    __builtin_abort ();
> +}
> +
> +__attribute__((noipa)) void
> +bar (const char *x, char *v)
> +{
> +  const char *w = x + __builtin_strlen (x);
> +
> +  while (x[0] == '*' && x < w - 1)
> +    x++;
> +
> +  const char *y = w - 1;
> +  int z = 1;
> +  if (y >= x)
> +    {
> +      while (y - x > 0 && *y == '*')
> +     y--;
> +      z = 0;
> +    }
> +  int i = 0;
> +  if (z)
> +    v[i++] = 'a';
> +  v[i] = 'b';
> +  foo (x, y, z);
> +}
> +
> +int
> +main ()
> +{
> +  char v[2] = { 0 };
> +  l = "**";
> +  bar (l, v);
> +}
> --- gcc/testsuite/g++.dg/torture/pr113994.C.jj        2024-12-03 
> 15:00:30.622701353 +0100
> +++ gcc/testsuite/g++.dg/torture/pr113994.C   2024-12-03 15:00:54.356373345 
> +0100
> @@ -0,0 +1,31 @@
> +// PR rtl-optimization/113994
> +// { dg-do run }
> +
> +#include <string>
> +
> +void
> +foo (const std::string &x, size_t &y, std::string &z)
> +{
> +  size_t w = x.find (']');
> +  if (w >= x.size ())
> +    return;
> +  else if (w == 1)
> +    y = std::string::npos;
> +  while (++w < x.size () && x[w] == u'.')
> +    ;
> +  z = x.substr (w);
> +}
> +
> +__attribute__((noipa)) void
> +bar ()
> +{
> +}
> +
> +int
> +main ()
> +{
> +  size_t y = 0;
> +  std::string z;
> +  foo ("[0]", y, z);
> +  bar ();
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to