On Fri, Mar 4, 2016 at 12:11 PM, Jakub Jelinek <ja...@redhat.com> wrote:
> On Fri, Mar 04, 2016 at 11:42:34AM +0100, Uros Bizjak wrote:
>> On Fri, Mar 4, 2016 at 11:34 AM, Jakub Jelinek <ja...@redhat.com> wrote:
>> > On Fri, Mar 04, 2016 at 11:16:44AM +0100, Uros Bizjak wrote:
>> >> On Fri, Mar 4, 2016 at 11:06 AM, Jakub Jelinek <ja...@redhat.com> wrote:
>> >> > On Fri, Mar 04, 2016 at 10:59:48AM +0100, Uros Bizjak wrote:
>> >> >> I don't like the fact that *dynamic_check is set to max (which is 0
>> >> >> with your testcase) when recursion avoidance code already set it to
>> >> >> "something reasonable", together with loop_1_byte alg. What do you
>> >> >> think about attached (lightly tested) patch?
>> >> >
>> >> > But that can still set *dynamic_check to 0 if the recursive call has
>> >> > not set *dynamic_check.
>> >> > So, perhaps we want *dynamic_check = max ? max : 128;
>> >> > ?
>> >>
>> >> I believe that recursive call set *dynamic_check to zero for a reason.
>> >> The sent patch deals with recursion breaking issues only, leaving all
>> >> other functionality as it was before. So, your issue is IMO orthogonal
>> >> to the PR70062 and should be fixed (if at all) in a separate patch.
>> >
>> > The recursive call should never set *dynamic_check to 0, only to
>> > -1 or 128 (the last one newly, since my fix).
>> > And, my issue is IMO not orghogonal to that, either *dynamic_check == 0
>> > is ok, or it is not.
>> > Perhaps better would be to add an extra argument to decide_alg, false
>> > for toplevel invocation, true for recursive invocation, and if recursive
>> > call, just never try to recurse again (thus we could revert the previous
>> > change) and don't set *dynamic_check to anything in that case during the
>> > recursion.
>> > And then at the caller side decide what we want for max == -1 and what
>> > for max == 0 with *dynamic_check.
>>
>> I think that would work, and considering that we have only one
>> non-recursive callsite of decide_alg, it looks like the cleanest
>> solution.
>
> So like this?

Yes, this looks like much better and cleaner solution.

> 2016-03-03  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/70062
>         * config/i386/i386.c (decide_alg): Add RECUR argument.  Revert
>         2016-02-22 changes, instead don't recurse if RECUR is already true.
>         Don't change *dynamic_check if RECUR.  Adjust recursive caller
>         to pass true to the new argument.
>         (ix86_expand_set_or_movmem): Adjust decide_alg caller.
>
>         * gcc.target/i386/pr70062.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2016-03-03 21:49:38.535744904 +0100
> +++ gcc/config/i386/i386.c      2016-03-04 12:06:40.075300426 +0100
> @@ -26032,14 +26032,13 @@ static enum stringop_alg
>  decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
>             unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
>             bool memset, bool zero_memset, bool have_as,
> -           int *dynamic_check, bool *noalign)
> +           int *dynamic_check, bool *noalign, bool recur)
>  {
>    const struct stringop_algs *algs;
>    bool optimize_for_speed;
>    int max = 0;
>    const struct processor_costs *cost;
>    int i;
> -  HOST_WIDE_INT orig_expected_size = expected_size;
>    bool any_alg_usable_p = false;
>
>    *noalign = false;
> @@ -26157,19 +26156,18 @@ decide_alg (HOST_WIDE_INT count, HOST_WI
>        enum stringop_alg alg;
>        HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
>
> -      /* If there aren't any usable algorithms or if recursing with the
> -        same arguments as before, then recursing on smaller sizes or
> -        same size isn't going to find anything.  Just return the simple
> -        byte-at-a-time copy loop.  */
> -      if (!any_alg_usable_p || orig_expected_size == new_expected_size)
> -        {
> -          /* Pick something reasonable.  */
> -          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
> -            *dynamic_check = 128;
> -          return loop_1_byte;
> -        }
> +      /* If there aren't any usable algorithms or if recursing already,
> +        then recursing on smaller sizes or same size isn't going to
> +        find anything.  Just return the simple byte-at-a-time copy loop.  */
> +      if (!any_alg_usable_p || recur)
> +       {
> +         /* Pick something reasonable.  */
> +         if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
> +           *dynamic_check = 128;
> +         return loop_1_byte;
> +       }
>        alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
> -                       zero_memset, have_as, dynamic_check, noalign);
> +                       zero_memset, have_as, dynamic_check, noalign, true);
>        gcc_assert (*dynamic_check == -1);
>        if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
>         *dynamic_check = max;
> @@ -26430,7 +26428,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx
>    alg = decide_alg (count, expected_size, min_size, probable_max_size,
>                     issetmem,
>                     issetmem && val_exp == const0_rtx, have_as,
> -                   &dynamic_check, &noalign);
> +                   &dynamic_check, &noalign, false);
>    if (alg == libcall)
>      return false;
>    gcc_assert (alg != no_stringop);
> --- gcc/testsuite/gcc.target/i386/pr70062.c.jj  2016-03-04 12:03:55.240561744 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr70062.c     2016-03-04 12:03:55.240561744 
> +0100
> @@ -0,0 +1,11 @@
> +/* PR target/70062 */
> +/* { dg-options "-minline-all-stringops -minline-stringops-dynamically 
> -mmemcpy-strategy=libcall:-1:noalign -Wno-psabi" } */
> +/* { dg-additional-options "-mtune=k6-2" { target ia32 } } */
> +
> +typedef int V __attribute__ ((vector_size (32)));
> +
> +V
> +foo (V x)
> +{
> +  return (V) { x[0] };
> +}
>
>
>         Jakub

Reply via email to