On Mon, 24 Jun 2019 at 14:59, Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> > diff --git a/gcc/fwprop.c b/gcc/fwprop.c
> > index 45703fe5f01..93a1a10c9a6 100644
> > --- a/gcc/fwprop.c
> > +++ b/gcc/fwprop.c
> > @@ -547,6 +547,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, 
> > int flags)
> >         tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
> >                                    SUBREG_BYTE (x));
> >       }
> > +
> > +      else
> > +     {
> > +       rtvec vec;
> > +       rtvec newvec;
> > +       const char *fmt = GET_RTX_FORMAT (code);
> > +       rtx op;
> > +
> > +       for (int i = 0; fmt[i]; i++)
> > +         switch (fmt[i])
> > +           {
> > +           case 'E':
> > +             vec = XVEC (x, i);
> > +             newvec = vec;
> > +             for (int j = 0; j < GET_NUM_ELEM (vec); j++)
> > +               {
> > +                 op = RTVEC_ELT (vec, j);
> > +                 valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, 
> > flags);
> > +                 if (op != RTVEC_ELT (vec, j))
> > +                   {
> > +                     if (newvec == vec)
> > +                       {
> > +                         newvec = shallow_copy_rtvec (vec);
> > +                         if (!tem)
> > +                           tem = shallow_copy_rtx (x);
> > +                         XVEC (tem, i) = newvec;
> > +                       }
> > +                     RTVEC_ELT (newvec, j) = op;
> > +                   }
> > +               }
> > +           break;
>
> Misindented break: should be indented two columns further.
>
> > +
> > +           case 'e':
> > +             if (XEXP (x, i))
> > +               {
> > +                 op = XEXP (x, i);
> > +                 valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, 
> > flags);
> > +                 if (op != XEXP (x, i))
> > +                   {
> > +                     if (!tem)
> > +                       tem = shallow_copy_rtx (x);
> > +                     XEXP (tem, i) = op;
> > +                   }
> > +               }
> > +           break;
>
> Same here.
>
> > +           }
> > +     }
> > +
> >        break;
> >
> >      case RTX_OBJ:
> > @@ -1370,10 +1418,11 @@ forward_propagate_and_simplify (df_ref use, 
> > rtx_insn *def_insn, rtx def_set)
> >
> >  /* Given a use USE of an insn, if it has a single reaching
> >     definition, try to forward propagate it into that insn.
> > -   Return true if cfg cleanup will be needed.  */
> > +   Return true if cfg cleanup will be needed.
> > +   REG_PROP_ONLY is true if we should only propagate register copies.  */
> >
> >  static bool
> > -forward_propagate_into (df_ref use)
> > +forward_propagate_into (df_ref use, bool reg_prop_only = false)
> >  {
> >    df_ref def;
> >    rtx_insn *def_insn, *use_insn;
> > @@ -1394,10 +1443,6 @@ forward_propagate_into (df_ref use)
> >    if (DF_REF_IS_ARTIFICIAL (def))
> >      return false;
> >
> > -  /* Do not propagate loop invariant definitions inside the loop.  */
> > -  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
> > -    return false;
> > -
> >    /* Check if the use is still present in the insn!  */
> >    use_insn = DF_REF_INSN (use);
> >    if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
> > @@ -1415,6 +1460,16 @@ forward_propagate_into (df_ref use)
> >    if (!def_set)
> >      return false;
> >
> > +  if (reg_prop_only && !REG_P (SET_SRC (def_set)))
> > +    return false;
> > +
> > +  /* Allow propagating def inside loop only if source of def_set is
> > +     reg, since replacing reg by source reg shouldn't increase cost.  */
>
> Maybe:
>
>   /* Allow propagations into a loop only for reg-to-reg copies, since
>      replacing one register by another shouldn't increase the cost.  */
>
> > +
> > +  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father
> > +      && !REG_P (SET_SRC (def_set)))
> > +    return false;
>
> To be extra safe, I think we should check that the SET_DEST is a REG_P
> in both cases, to exclude REG-to-SUBREG copies.
Thanks for the suggestions.
Does the attached version look OK ?

Thanks,
Prathamesh
>
> Thanks,
> Richard
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 45703fe5f01..c5abebb7832 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -547,6 +547,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 				     SUBREG_BYTE (x));
 	}
+
+      else
+	{
+	  rtvec vec;
+	  rtvec newvec;
+	  const char *fmt = GET_RTX_FORMAT (code);
+	  rtx op;
+
+	  for (int i = 0; fmt[i]; i++)
+	    switch (fmt[i])
+	      {
+	      case 'E':
+		vec = XVEC (x, i);
+		newvec = vec;
+		for (int j = 0; j < GET_NUM_ELEM (vec); j++)
+		  {
+		    op = RTVEC_ELT (vec, j);
+		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
+		    if (op != RTVEC_ELT (vec, j))
+		      {
+			if (newvec == vec)
+			  {
+			    newvec = shallow_copy_rtvec (vec);
+			    if (!tem)
+			      tem = shallow_copy_rtx (x);
+			    XVEC (tem, i) = newvec;
+			  }
+			RTVEC_ELT (newvec, j) = op;
+		      }
+		  }
+	        break;
+
+	      case 'e':
+		if (XEXP (x, i))
+		  {
+		    op = XEXP (x, i);
+		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
+		    if (op != XEXP (x, i))
+		      {
+			if (!tem)
+			  tem = shallow_copy_rtx (x);
+			XEXP (tem, i) = op;
+		      }
+		  }
+	        break;
+	      }
+	}
+
       break;
 
     case RTX_OBJ:
@@ -1370,10 +1418,11 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
 
 /* Given a use USE of an insn, if it has a single reaching
    definition, try to forward propagate it into that insn.
-   Return true if cfg cleanup will be needed.  */
+   Return true if cfg cleanup will be needed.
+   REG_PROP_ONLY is true if we should only propagate register copies.  */
 
 static bool
-forward_propagate_into (df_ref use)
+forward_propagate_into (df_ref use, bool reg_prop_only = false)
 {
   df_ref def;
   rtx_insn *def_insn, *use_insn;
@@ -1394,10 +1443,6 @@ forward_propagate_into (df_ref use)
   if (DF_REF_IS_ARTIFICIAL (def))
     return false;
 
-  /* Do not propagate loop invariant definitions inside the loop.  */
-  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
-    return false;
-
   /* Check if the use is still present in the insn!  */
   use_insn = DF_REF_INSN (use);
   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
@@ -1415,6 +1460,19 @@ forward_propagate_into (df_ref use)
   if (!def_set)
     return false;
 
+  if (reg_prop_only
+      && !REG_P (SET_SRC (def_set))
+      && !REG_P (SET_DEST (def_set)))
+    return false;
+
+  /* Allow propagations into a loop only for reg-to-reg copies, since
+     replacing one register by another shouldn't increase the cost.  */
+
+  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father
+      && !REG_P (SET_SRC (def_set))
+      && !REG_P (SET_DEST (def_set)))
+    return false;
+
   /* Only try one kind of propagation.  If two are possible, we'll
      do it on the following iterations.  */
   if (forward_propagate_and_simplify (use, def_insn, def_set)
@@ -1483,7 +1541,7 @@ gate_fwprop (void)
 }
 
 static unsigned int
-fwprop (void)
+fwprop (bool fwprop_addr_p)
 {
   unsigned i;
 
@@ -1502,11 +1560,16 @@ fwprop (void)
 
       df_ref use = DF_USES_GET (i);
       if (use)
-	if (DF_REF_TYPE (use) == DF_REF_REG_USE
-	    || DF_REF_BB (use)->loop_father == NULL
-	    /* The outer most loop is not really a loop.  */
-	    || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
-	  forward_propagate_into (use);
+	{
+	  if (DF_REF_TYPE (use) == DF_REF_REG_USE
+	      || DF_REF_BB (use)->loop_father == NULL
+	      /* The outer most loop is not really a loop.  */
+	      || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
+	    forward_propagate_into (use, fwprop_addr_p);
+
+	  else if (fwprop_addr_p)
+	    forward_propagate_into (use, false);
+	}
     }
 
   fwprop_done ();
@@ -1537,7 +1600,7 @@ public:
 
   /* opt_pass methods: */
   virtual bool gate (function *) { return gate_fwprop (); }
-  virtual unsigned int execute (function *) { return fwprop (); }
+  virtual unsigned int execute (function *) { return fwprop (false); }
 
 }; // class pass_rtl_fwprop
 
@@ -1549,33 +1612,6 @@ make_pass_rtl_fwprop (gcc::context *ctxt)
   return new pass_rtl_fwprop (ctxt);
 }
 
-static unsigned int
-fwprop_addr (void)
-{
-  unsigned i;
-
-  fwprop_init ();
-
-  /* Go through all the uses.  df_uses_create will create new ones at the
-     end, and we'll go through them as well.  */
-  for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
-    {
-      if (!propagations_left)
-	break;
-
-      df_ref use = DF_USES_GET (i);
-      if (use)
-	if (DF_REF_TYPE (use) != DF_REF_REG_USE
-	    && DF_REF_BB (use)->loop_father != NULL
-	    /* The outer most loop is not really a loop.  */
-	    && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
-	  forward_propagate_into (use);
-    }
-
-  fwprop_done ();
-  return 0;
-}
-
 namespace {
 
 const pass_data pass_data_rtl_fwprop_addr =
@@ -1600,7 +1636,7 @@ public:
 
   /* opt_pass methods: */
   virtual bool gate (function *) { return gate_fwprop (); }
-  virtual unsigned int execute (function *) { return fwprop_addr (); }
+  virtual unsigned int execute (function *) { return fwprop (true); }
 
 }; // class pass_rtl_fwprop_addr
 
diff --git a/gcc/testsuite/gfortran.dg/pr88833.f90 b/gcc/testsuite/gfortran.dg/pr88833.f90
new file mode 100644
index 00000000000..224e6ce5f3d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr88833.f90
@@ -0,0 +1,9 @@
+! { dg-do assemble { target aarch64_asm_sve_ok } }
+! { dg-options "-O3 -march=armv8.2-a+sve --save-temps" }
+
+subroutine foo(x)
+  real :: x(100)
+  x = x + 10
+end subroutine foo
+
+! { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.s, wzr, (w[0-9]+).*\twhilelo\tp[0-9]+\.s, w[0-9]+, \1} } }

Reply via email to