On Fri, Mar 04, 2016 at 11:42:34AM +0100, Uros Bizjak wrote:
> On Fri, Mar 4, 2016 at 11:34 AM, Jakub Jelinek <ja...@redhat.com> wrote:
> > On Fri, Mar 04, 2016 at 11:16:44AM +0100, Uros Bizjak wrote:
> >> On Fri, Mar 4, 2016 at 11:06 AM, Jakub Jelinek <ja...@redhat.com> wrote:
> >> > On Fri, Mar 04, 2016 at 10:59:48AM +0100, Uros Bizjak wrote:
> >> >> I don't like the fact that *dynamic_check is set to max (which is 0
> >> >> with your testcase) when recursion avoidance code already set it to
> >> >> "something reasonable", together with loop_1_byte alg. What do you
> >> >> think about attached (lightly tested) patch?
> >> >
> >> > But that can still set *dynamic_check to 0 if the recursive call has
> >> > not set *dynamic_check.
> >> > So, perhaps we want *dynamic_check = max ? max : 128;
> >> > ?
> >>
> >> I believe that recursive call set *dynamic_check to zero for a reason.
> >> The sent patch deals with recursion breaking issues only, leaving all
> >> other functionality as it was before. So, your issue is IMO orthogonal
> >> to the PR70062 and should be fixed (if at all) in a separate patch.
> >
> > The recursive call should never set *dynamic_check to 0, only to
> > -1 or 128 (the last one newly, since my fix).
> > And, my issue is IMO not orghogonal to that, either *dynamic_check == 0
> > is ok, or it is not.
> > Perhaps better would be to add an extra argument to decide_alg, false
> > for toplevel invocation, true for recursive invocation, and if recursive
> > call, just never try to recurse again (thus we could revert the previous
> > change) and don't set *dynamic_check to anything in that case during the
> > recursion.
> > And then at the caller side decide what we want for max == -1 and what
> > for max == 0 with *dynamic_check.
> 
> I think that would work, and considering that we have only one
> non-recursive callsite of decide_alg, it looks like the cleanest
> solution.

So like this?

2016-03-03  Jakub Jelinek  <ja...@redhat.com>

        PR target/70062
        * config/i386/i386.c (decide_alg): Add RECUR argument.  Revert
        2016-02-22 changes, instead don't recurse if RECUR is already true.
        Don't change *dynamic_check if RECUR.  Adjust recursive caller
        to pass true to the new argument.
        (ix86_expand_set_or_movmem): Adjust decide_alg caller.

        * gcc.target/i386/pr70062.c: New test.

--- gcc/config/i386/i386.c.jj   2016-03-03 21:49:38.535744904 +0100
+++ gcc/config/i386/i386.c      2016-03-04 12:06:40.075300426 +0100
@@ -26032,14 +26032,13 @@ static enum stringop_alg
 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
            unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
            bool memset, bool zero_memset, bool have_as,
-           int *dynamic_check, bool *noalign)
+           int *dynamic_check, bool *noalign, bool recur)
 {
   const struct stringop_algs *algs;
   bool optimize_for_speed;
   int max = 0;
   const struct processor_costs *cost;
   int i;
-  HOST_WIDE_INT orig_expected_size = expected_size;
   bool any_alg_usable_p = false;
 
   *noalign = false;
@@ -26157,19 +26156,18 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
       enum stringop_alg alg;
       HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
 
-      /* If there aren't any usable algorithms or if recursing with the
-        same arguments as before, then recursing on smaller sizes or
-        same size isn't going to find anything.  Just return the simple
-        byte-at-a-time copy loop.  */
-      if (!any_alg_usable_p || orig_expected_size == new_expected_size)
-        {
-          /* Pick something reasonable.  */
-          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
-            *dynamic_check = 128;
-          return loop_1_byte;
-        }
+      /* If there aren't any usable algorithms or if recursing already,
+        then recursing on smaller sizes or same size isn't going to
+        find anything.  Just return the simple byte-at-a-time copy loop.  */
+      if (!any_alg_usable_p || recur)
+       {
+         /* Pick something reasonable.  */
+         if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
+           *dynamic_check = 128;
+         return loop_1_byte;
+       }
       alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
-                       zero_memset, have_as, dynamic_check, noalign);
+                       zero_memset, have_as, dynamic_check, noalign, true);
       gcc_assert (*dynamic_check == -1);
       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
        *dynamic_check = max;
@@ -26430,7 +26428,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx
   alg = decide_alg (count, expected_size, min_size, probable_max_size,
                    issetmem,
                    issetmem && val_exp == const0_rtx, have_as,
-                   &dynamic_check, &noalign);
+                   &dynamic_check, &noalign, false);
   if (alg == libcall)
     return false;
   gcc_assert (alg != no_stringop);
--- gcc/testsuite/gcc.target/i386/pr70062.c.jj  2016-03-04 12:03:55.240561744 
+0100
+++ gcc/testsuite/gcc.target/i386/pr70062.c     2016-03-04 12:03:55.240561744 
+0100
@@ -0,0 +1,11 @@
+/* PR target/70062 */
+/* { dg-options "-minline-all-stringops -minline-stringops-dynamically 
-mmemcpy-strategy=libcall:-1:noalign -Wno-psabi" } */
+/* { dg-additional-options "-mtune=k6-2" { target ia32 } } */
+
+typedef int V __attribute__ ((vector_size (32)));
+
+V
+foo (V x)
+{
+  return (V) { x[0] };
+}


        Jakub

Reply via email to