Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> On Tue, 17 Oct 2023 at 02:40, Richard Sandiford
> <richard.sandif...@arm.com> wrote:
>> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
>> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
>> > index 4f8561509ff..55a6a68c16c 100644
>> > --- a/gcc/fold-const.cc
>> > +++ b/gcc/fold-const.cc
>> > @@ -10684,9 +10684,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, 
>> > tree arg1,
>> >
>> >        /* Ensure that the stepped sequence always selects from the same
>> >        input pattern.  */
>> > -      unsigned arg_npatterns
>> > -     = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
>> > -                       : VECTOR_CST_NPATTERNS (arg1);
>> > +      tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
>> > +      unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
>> >
>> >        if (!multiple_p (step, arg_npatterns))
>> >       {
>> > @@ -10694,6 +10693,29 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, 
>> > tree arg1,
>> >           *reason = "step is not multiple of npatterns";
>> >         return false;
>> >       }
>> > +
>> > +      /* If a1 chooses base element from arg, ensure that it's a natural
>> > +      stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
>> > +      to preserve arg's encoding.  */
>> > +
>> > +      unsigned HOST_WIDE_INT index;
>> > +      if (!r1.is_constant (&index))
>> > +     return false;
>> > +      if (index < arg_npatterns)
>> > +     {
>>
>> I don't know whether it matters in practice, but I think the two conditions
>> above are more natural as:
>>
>>     if (maybe_lt (r1, arg_npatterns))
>>       {
>>         unsigned HOST_WIDE_INT index;
>>         if (!r1.is_constant (&index))
>>           return false;
>>
>>         ...[code below]...
>>       }
>>
>> > +       tree arg_elem0 = vector_cst_elt (arg, index);
>> > +       tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
>> > +       tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
>> > +
>> > +       if (!operand_equal_p (const_binop (MINUS_EXPR, arg_elem2, 
>> > arg_elem1),
>> > +                             const_binop (MINUS_EXPR, arg_elem1, 
>> > arg_elem0),
>> > +                             0))
>>
>> This needs to check whether const_binop returns null.  Maybe:
>>
>>    tree step1, step2;
>>    if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
>>        || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
>>        || !operand_equal_p (step1, step2, 0))
>>
>> OK with those changes, thanks.
> Hi Richard,
> Thanks for the suggestions, updated the attached patch accordingly.
> Bootstrapped+tested with and without SVE on aarch64-linux-gnu and
> x86_64-linux-gnu.
> OK to commit ?

Yes, thanks.

Richard

>
> Thanks,
> Prathamesh
>>
>> Richard
>>
>> > +         {
>> > +           if (reason)
>> > +             *reason = "not a natural stepped sequence";
>> > +           return false;
>> > +         }
>> > +     }
>> >      }
>> >
>> >    return true;
>> > @@ -17161,7 +17183,8 @@ namespace test_fold_vec_perm_cst {
>> >  static tree
>> >  build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
>> >                   unsigned nelts_per_pattern,
>> > -                 int step = 0, int threshold = 100)
>> > +                 int step = 0, bool natural_stepped = false,
>> > +                 int threshold = 100)
>> >  {
>> >    tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER 
>> > (vmode), 1);
>> >    tree vectype = build_vector_type_for_mode (inner_type, vmode);
>> > @@ -17176,17 +17199,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned 
>> > npatterns,
>> >
>> >    // Fill a1 for each pattern
>> >    for (unsigned i = 0; i < npatterns; i++)
>> > -    builder.quick_push (build_int_cst (inner_type, rand () % threshold));
>> > -
>> > +    {
>> > +      tree a1;
>> > +      if (natural_stepped)
>> > +     {
>> > +       tree a0 = builder[i];
>> > +       wide_int a0_val = wi::to_wide (a0);
>> > +       wide_int a1_val = a0_val + step;
>> > +       a1 = wide_int_to_tree (inner_type, a1_val);
>> > +     }
>> > +      else
>> > +     a1 = build_int_cst (inner_type, rand () % threshold);
>> > +      builder.quick_push (a1);
>> > +    }
>> >    if (nelts_per_pattern == 2)
>> >      return builder.build ();
>> >
>> >    for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
>> >      {
>> >        tree prev_elem = builder[i - npatterns];
>> > -      int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
>> > -      int val = prev_elem_val + step;
>> > -      builder.quick_push (build_int_cst (inner_type, val));
>> > +      wide_int prev_elem_val = wi::to_wide (prev_elem);
>> > +      wide_int val = prev_elem_val + step;
>> > +      builder.quick_push (wide_int_to_tree (inner_type, val));
>> >      }
>> >
>> >    return builder.build ();
>> > @@ -17432,7 +17466,7 @@ test_nunits_min_2 (machine_mode vmode)
>> >        and step (a2 - a1) = 1, step is not a multiple of npatterns
>> >        in input vector. So return NULL_TREE.  */
>> >        {
>> > -     tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
>> > +     tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
>> >       tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
>> >       poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> >
>> > @@ -17452,7 +17486,7 @@ test_nunits_min_2 (machine_mode vmode)
>> >        Test that stepped sequence of the pattern selects from arg0.
>> >        res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3)  */
>> >        {
>> > -     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
>> >       tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
>> >       poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> >
>> > @@ -17466,6 +17500,62 @@ test_nunits_min_2 (machine_mode vmode)
>> >       tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
>> >       validate_res (1, 3, res, expected_res);
>> >        }
>> > +
>> > +      /* Case 6: PR111648 - a1 chooses base element from input vector arg.
>> > +      In this case ensure that arg has a natural stepped sequence
>> > +      to preserve arg's encoding.
>> > +
>> > +      As a concrete example, consider:
>> > +      arg0: { -16, -9, -10, ... } // (1, 3)
>> > +      arg1: { -12, -5, -6, ... }  // (1, 3)
>> > +      sel = { 0, len, len + 1, ... } // (1, 3)
>> > +
>> > +      This will create res with following encoding:
>> > +      res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
>> > +          = { -16, -12, -5, ... }
>> > +
>> > +      The step in above encoding would be: (-5) - (-12) = 7
>> > +      And hence res[3] would be computed as -5 + 7 = 2.
>> > +      instead of arg1[2], ie, -6.
>> > +      Ensure that valid_mask_for_fold_vec_perm_cst returns false
>> > +      for this case.  */
>> > +      {
>> > +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > +     tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > +     poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> > +
>> > +     vec_perm_builder builder (len, 1, 3);
>> > +     poly_uint64 mask_elems[] = { 0, len, len+1 };
>> > +     builder_push_elems (builder, mask_elems);
>> > +
>> > +     vec_perm_indices sel (builder, 2, len);
>> > +     const char *reason;
>> > +     /* FIXME: It may happen that build_vec_cst_rand may build a natural
>> > +        stepped pattern, even if we didn't explicitly tell it to. So 
>> > folding
>> > +        may not always fail, but if it does, ensure that's because arg1 
>> > does
>> > +        not have a natural stepped sequence (and not due to other reason) 
>> >  */
>> > +     tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, 
>> > &reason);
>> > +     if (res == NULL_TREE)
>> > +       ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
>> > +      }
>> > +
>> > +      /* Case 7: Same as Case 6, except that arg1 contains natural stepped
>> > +      sequence and thus folding should be valid for this case.  */
>> > +      {
>> > +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > +     tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
>> > +     poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> > +
>> > +     vec_perm_builder builder (len, 1, 3);
>> > +     poly_uint64 mask_elems[] = { 0, len, len+1 };
>> > +     builder_push_elems (builder, mask_elems);
>> > +
>> > +     vec_perm_indices sel (builder, 2, len);
>> > +     tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
>> > +
>> > +     tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
>> > +     validate_res (1, 3, res, expected_res);
>> > +      }
>> >      }
>> >  }
>> >
>> > diff --git a/gcc/testsuite/gcc.dg/vect/pr111648.c 
>> > b/gcc/testsuite/gcc.dg/vect/pr111648.c
>> > new file mode 100644
>> > index 00000000000..093e2b02654
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/vect/pr111648.c
>> > @@ -0,0 +1,23 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> > +
>> > +int a;
>> > +int *b = &a;
>> > +static int **c = &b;
>> > +static int d;
>> > +short e;
>> > +short f;
>> > +
>> > +_Bool foo ()
>> > +{
>> > +  f = -21;
>> > +  for (; f < -5; f++) {
>> > +    e = f ^ 3;
>> > +    d = *b;
>> > +    **c = e;
>> > +  }
>> > +
>> > +  return d == -6;
>> > +}
>> > +
>> > +/* { dg-final { scan-tree-dump "return 1" "optimized" } } */
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 44118e799eb..40767736389 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -10692,9 +10692,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree 
> arg1,
>  
>        /* Ensure that the stepped sequence always selects from the same
>        input pattern.  */
> -      unsigned arg_npatterns
> -     = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
> -                       : VECTOR_CST_NPATTERNS (arg1);
> +      tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
> +      unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
>  
>        if (!multiple_p (step, arg_npatterns))
>       {
> @@ -10702,6 +10701,31 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree 
> arg1,
>           *reason = "step is not multiple of npatterns";
>         return false;
>       }
> +
> +      /* If a1 chooses base element from arg, ensure that it's a natural
> +      stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
> +      to preserve arg's encoding.  */
> +
> +      if (maybe_lt (r1, arg_npatterns))
> +     {
> +       unsigned HOST_WIDE_INT index;
> +       if (!r1.is_constant (&index))
> +         return false;
> +
> +       tree arg_elem0 = vector_cst_elt (arg, index);
> +       tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
> +       tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
> +
> +       tree step1, step2;
> +       if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
> +           || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
> +           || !operand_equal_p (step1, step2, 0))
> +         {
> +           if (reason)
> +             *reason = "not a natural stepped sequence";
> +           return false;
> +         }
> +     }
>      }
>  
>    return true;
> @@ -17165,7 +17189,8 @@ namespace test_fold_vec_perm_cst {
>  static tree
>  build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
>                   unsigned nelts_per_pattern,
> -                 int step = 0, int threshold = 100)
> +                 int step = 0, bool natural_stepped = false,
> +                 int threshold = 100)
>  {
>    tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER (vmode), 
> 1);
>    tree vectype = build_vector_type_for_mode (inner_type, vmode);
> @@ -17180,17 +17205,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned 
> npatterns,
>  
>    // Fill a1 for each pattern
>    for (unsigned i = 0; i < npatterns; i++)
> -    builder.quick_push (build_int_cst (inner_type, rand () % threshold));
> -
> +    {
> +      tree a1;
> +      if (natural_stepped)
> +     {
> +       tree a0 = builder[i];
> +       wide_int a0_val = wi::to_wide (a0);
> +       wide_int a1_val = a0_val + step;
> +       a1 = wide_int_to_tree (inner_type, a1_val);
> +     }
> +      else
> +     a1 = build_int_cst (inner_type, rand () % threshold);
> +      builder.quick_push (a1);
> +    }
>    if (nelts_per_pattern == 2)
>      return builder.build ();
>  
>    for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
>      {
>        tree prev_elem = builder[i - npatterns];
> -      int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
> -      int val = prev_elem_val + step;
> -      builder.quick_push (build_int_cst (inner_type, val));
> +      wide_int prev_elem_val = wi::to_wide (prev_elem);
> +      wide_int val = prev_elem_val + step;
> +      builder.quick_push (wide_int_to_tree (inner_type, val));
>      }
>  
>    return builder.build ();
> @@ -17436,7 +17472,7 @@ test_nunits_min_2 (machine_mode vmode)
>        and step (a2 - a1) = 1, step is not a multiple of npatterns
>        in input vector. So return NULL_TREE.  */
>        {
> -     tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> +     tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
>       tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
>       poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>  
> @@ -17456,7 +17492,7 @@ test_nunits_min_2 (machine_mode vmode)
>        Test that stepped sequence of the pattern selects from arg0.
>        res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3)  */
>        {
> -     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
>       tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
>       poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>  
> @@ -17470,6 +17506,62 @@ test_nunits_min_2 (machine_mode vmode)
>       tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
>       validate_res (1, 3, res, expected_res);
>        }
> +
> +      /* Case 6: PR111648 - a1 chooses base element from input vector arg.
> +      In this case ensure that arg has a natural stepped sequence
> +      to preserve arg's encoding.
> +
> +      As a concrete example, consider:
> +      arg0: { -16, -9, -10, ... } // (1, 3)
> +      arg1: { -12, -5, -6, ... }  // (1, 3)
> +      sel = { 0, len, len + 1, ... } // (1, 3)
> +
> +      This will create res with following encoding:
> +      res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
> +          = { -16, -12, -5, ... }
> +
> +      The step in above encoding would be: (-5) - (-12) = 7
> +      And hence res[3] would be computed as -5 + 7 = 2.
> +      instead of arg1[2], ie, -6.
> +      Ensure that valid_mask_for_fold_vec_perm_cst returns false
> +      for this case.  */
> +      {
> +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> +     tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> +     poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> +
> +     vec_perm_builder builder (len, 1, 3);
> +     poly_uint64 mask_elems[] = { 0, len, len+1 };
> +     builder_push_elems (builder, mask_elems);
> +
> +     vec_perm_indices sel (builder, 2, len);
> +     const char *reason;
> +     /* FIXME: It may happen that build_vec_cst_rand may build a natural
> +        stepped pattern, even if we didn't explicitly tell it to. So folding
> +        may not always fail, but if it does, ensure that's because arg1 does
> +        not have a natural stepped sequence (and not due to other reason)  */
> +     tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, 
> &reason);
> +     if (res == NULL_TREE)
> +       ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
> +      }
> +
> +      /* Case 7: Same as Case 6, except that arg1 contains natural stepped
> +      sequence and thus folding should be valid for this case.  */
> +      {
> +     tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> +     tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> +     poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> +
> +     vec_perm_builder builder (len, 1, 3);
> +     poly_uint64 mask_elems[] = { 0, len, len+1 };
> +     builder_push_elems (builder, mask_elems);
> +
> +     vec_perm_indices sel (builder, 2, len);
> +     tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
> +
> +     tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
> +     validate_res (1, 3, res, expected_res);
> +      }
>      }
>  }
>  

Reply via email to