Prathamesh Kulkarni <[email protected]> writes:
> On Tue, 17 Oct 2023 at 02:40, Richard Sandiford
> <[email protected]> wrote:
>> Prathamesh Kulkarni <[email protected]> writes:
>> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
>> > index 4f8561509ff..55a6a68c16c 100644
>> > --- a/gcc/fold-const.cc
>> > +++ b/gcc/fold-const.cc
>> > @@ -10684,9 +10684,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0,
>> > tree arg1,
>> >
>> > /* Ensure that the stepped sequence always selects from the same
>> > input pattern. */
>> > - unsigned arg_npatterns
>> > - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
>> > - : VECTOR_CST_NPATTERNS (arg1);
>> > + tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
>> > + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
>> >
>> > if (!multiple_p (step, arg_npatterns))
>> > {
>> > @@ -10694,6 +10693,29 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0,
>> > tree arg1,
>> > *reason = "step is not multiple of npatterns";
>> > return false;
>> > }
>> > +
>> > + /* If a1 chooses base element from arg, ensure that it's a natural
>> > + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
>> > + to preserve arg's encoding. */
>> > +
>> > + unsigned HOST_WIDE_INT index;
>> > + if (!r1.is_constant (&index))
>> > + return false;
>> > + if (index < arg_npatterns)
>> > + {
>>
>> I don't know whether it matters in practice, but I think the two conditions
>> above are more natural as:
>>
>> if (maybe_lt (r1, arg_npatterns))
>> {
>> unsigned HOST_WIDE_INT index;
>> if (!r1.is_constant (&index))
>> return false;
>>
>> ...[code below]...
>> }
>>
>> > + tree arg_elem0 = vector_cst_elt (arg, index);
>> > + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
>> > + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
>> > +
>> > + if (!operand_equal_p (const_binop (MINUS_EXPR, arg_elem2,
>> > arg_elem1),
>> > + const_binop (MINUS_EXPR, arg_elem1,
>> > arg_elem0),
>> > + 0))
>>
>> This needs to check whether const_binop returns null. Maybe:
>>
>> tree step1, step2;
>> if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
>> || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
>> || !operand_equal_p (step1, step2, 0))
>>
>> OK with those changes, thanks.
> Hi Richard,
> Thanks for the suggestions, updated the attached patch accordingly.
> Bootstrapped+tested with and without SVE on aarch64-linux-gnu and
> x86_64-linux-gnu.
> OK to commit ?
Yes, thanks.
Richard
>
> Thanks,
> Prathamesh
>>
>> Richard
>>
>> > + {
>> > + if (reason)
>> > + *reason = "not a natural stepped sequence";
>> > + return false;
>> > + }
>> > + }
>> > }
>> >
>> > return true;
>> > @@ -17161,7 +17183,8 @@ namespace test_fold_vec_perm_cst {
>> > static tree
>> > build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
>> > unsigned nelts_per_pattern,
>> > - int step = 0, int threshold = 100)
>> > + int step = 0, bool natural_stepped = false,
>> > + int threshold = 100)
>> > {
>> > tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER
>> > (vmode), 1);
>> > tree vectype = build_vector_type_for_mode (inner_type, vmode);
>> > @@ -17176,17 +17199,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned
>> > npatterns,
>> >
>> > // Fill a1 for each pattern
>> > for (unsigned i = 0; i < npatterns; i++)
>> > - builder.quick_push (build_int_cst (inner_type, rand () % threshold));
>> > -
>> > + {
>> > + tree a1;
>> > + if (natural_stepped)
>> > + {
>> > + tree a0 = builder[i];
>> > + wide_int a0_val = wi::to_wide (a0);
>> > + wide_int a1_val = a0_val + step;
>> > + a1 = wide_int_to_tree (inner_type, a1_val);
>> > + }
>> > + else
>> > + a1 = build_int_cst (inner_type, rand () % threshold);
>> > + builder.quick_push (a1);
>> > + }
>> > if (nelts_per_pattern == 2)
>> > return builder.build ();
>> >
>> > for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
>> > {
>> > tree prev_elem = builder[i - npatterns];
>> > - int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
>> > - int val = prev_elem_val + step;
>> > - builder.quick_push (build_int_cst (inner_type, val));
>> > + wide_int prev_elem_val = wi::to_wide (prev_elem);
>> > + wide_int val = prev_elem_val + step;
>> > + builder.quick_push (wide_int_to_tree (inner_type, val));
>> > }
>> >
>> > return builder.build ();
>> > @@ -17432,7 +17466,7 @@ test_nunits_min_2 (machine_mode vmode)
>> > and step (a2 - a1) = 1, step is not a multiple of npatterns
>> > in input vector. So return NULL_TREE. */
>> > {
>> > - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
>> > + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
>> > tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
>> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> >
>> > @@ -17452,7 +17486,7 @@ test_nunits_min_2 (machine_mode vmode)
>> > Test that stepped sequence of the pattern selects from arg0.
>> > res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */
>> > {
>> > - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
>> > tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> >
>> > @@ -17466,6 +17500,62 @@ test_nunits_min_2 (machine_mode vmode)
>> > tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
>> > validate_res (1, 3, res, expected_res);
>> > }
>> > +
>> > + /* Case 6: PR111648 - a1 chooses base element from input vector arg.
>> > + In this case ensure that arg has a natural stepped sequence
>> > + to preserve arg's encoding.
>> > +
>> > + As a concrete example, consider:
>> > + arg0: { -16, -9, -10, ... } // (1, 3)
>> > + arg1: { -12, -5, -6, ... } // (1, 3)
>> > + sel = { 0, len, len + 1, ... } // (1, 3)
>> > +
>> > + This will create res with following encoding:
>> > + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
>> > + = { -16, -12, -5, ... }
>> > +
>> > + The step in above encoding would be: (-5) - (-12) = 7
>> > + And hence res[3] would be computed as -5 + 7 = 2.
>> > + instead of arg1[2], ie, -6.
>> > + Ensure that valid_mask_for_fold_vec_perm_cst returns false
>> > + for this case. */
>> > + {
>> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> > +
>> > + vec_perm_builder builder (len, 1, 3);
>> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
>> > + builder_push_elems (builder, mask_elems);
>> > +
>> > + vec_perm_indices sel (builder, 2, len);
>> > + const char *reason;
>> > + /* FIXME: It may happen that build_vec_cst_rand may build a natural
>> > + stepped pattern, even if we didn't explicitly tell it to. So
>> > folding
>> > + may not always fail, but if it does, ensure that's because arg1
>> > does
>> > + not have a natural stepped sequence (and not due to other reason)
>> > */
>> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel,
>> > &reason);
>> > + if (res == NULL_TREE)
>> > + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
>> > + }
>> > +
>> > + /* Case 7: Same as Case 6, except that arg1 contains natural stepped
>> > + sequence and thus folding should be valid for this case. */
>> > + {
>> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
>> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
>> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>> > +
>> > + vec_perm_builder builder (len, 1, 3);
>> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
>> > + builder_push_elems (builder, mask_elems);
>> > +
>> > + vec_perm_indices sel (builder, 2, len);
>> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
>> > +
>> > + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
>> > + validate_res (1, 3, res, expected_res);
>> > + }
>> > }
>> > }
>> >
>> > diff --git a/gcc/testsuite/gcc.dg/vect/pr111648.c
>> > b/gcc/testsuite/gcc.dg/vect/pr111648.c
>> > new file mode 100644
>> > index 00000000000..093e2b02654
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/vect/pr111648.c
>> > @@ -0,0 +1,23 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> > +
>> > +int a;
>> > +int *b = &a;
>> > +static int **c = &b;
>> > +static int d;
>> > +short e;
>> > +short f;
>> > +
>> > +_Bool foo ()
>> > +{
>> > + f = -21;
>> > + for (; f < -5; f++) {
>> > + e = f ^ 3;
>> > + d = *b;
>> > + **c = e;
>> > + }
>> > +
>> > + return d == -6;
>> > +}
>> > +
>> > +/* { dg-final { scan-tree-dump "return 1" "optimized" } } */
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 44118e799eb..40767736389 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -10692,9 +10692,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree
> arg1,
>
> /* Ensure that the stepped sequence always selects from the same
> input pattern. */
> - unsigned arg_npatterns
> - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
> - : VECTOR_CST_NPATTERNS (arg1);
> + tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
> + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
>
> if (!multiple_p (step, arg_npatterns))
> {
> @@ -10702,6 +10701,31 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree
> arg1,
> *reason = "step is not multiple of npatterns";
> return false;
> }
> +
> + /* If a1 chooses base element from arg, ensure that it's a natural
> + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
> + to preserve arg's encoding. */
> +
> + if (maybe_lt (r1, arg_npatterns))
> + {
> + unsigned HOST_WIDE_INT index;
> + if (!r1.is_constant (&index))
> + return false;
> +
> + tree arg_elem0 = vector_cst_elt (arg, index);
> + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
> + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
> +
> + tree step1, step2;
> + if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
> + || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
> + || !operand_equal_p (step1, step2, 0))
> + {
> + if (reason)
> + *reason = "not a natural stepped sequence";
> + return false;
> + }
> + }
> }
>
> return true;
> @@ -17165,7 +17189,8 @@ namespace test_fold_vec_perm_cst {
> static tree
> build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
> unsigned nelts_per_pattern,
> - int step = 0, int threshold = 100)
> + int step = 0, bool natural_stepped = false,
> + int threshold = 100)
> {
> tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER (vmode),
> 1);
> tree vectype = build_vector_type_for_mode (inner_type, vmode);
> @@ -17180,17 +17205,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned
> npatterns,
>
> // Fill a1 for each pattern
> for (unsigned i = 0; i < npatterns; i++)
> - builder.quick_push (build_int_cst (inner_type, rand () % threshold));
> -
> + {
> + tree a1;
> + if (natural_stepped)
> + {
> + tree a0 = builder[i];
> + wide_int a0_val = wi::to_wide (a0);
> + wide_int a1_val = a0_val + step;
> + a1 = wide_int_to_tree (inner_type, a1_val);
> + }
> + else
> + a1 = build_int_cst (inner_type, rand () % threshold);
> + builder.quick_push (a1);
> + }
> if (nelts_per_pattern == 2)
> return builder.build ();
>
> for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
> {
> tree prev_elem = builder[i - npatterns];
> - int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
> - int val = prev_elem_val + step;
> - builder.quick_push (build_int_cst (inner_type, val));
> + wide_int prev_elem_val = wi::to_wide (prev_elem);
> + wide_int val = prev_elem_val + step;
> + builder.quick_push (wide_int_to_tree (inner_type, val));
> }
>
> return builder.build ();
> @@ -17436,7 +17472,7 @@ test_nunits_min_2 (machine_mode vmode)
> and step (a2 - a1) = 1, step is not a multiple of npatterns
> in input vector. So return NULL_TREE. */
> {
> - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
> tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
> poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>
> @@ -17456,7 +17492,7 @@ test_nunits_min_2 (machine_mode vmode)
> Test that stepped sequence of the pattern selects from arg0.
> res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */
> {
> - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
>
> @@ -17470,6 +17506,62 @@ test_nunits_min_2 (machine_mode vmode)
> tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
> validate_res (1, 3, res, expected_res);
> }
> +
> + /* Case 6: PR111648 - a1 chooses base element from input vector arg.
> + In this case ensure that arg has a natural stepped sequence
> + to preserve arg's encoding.
> +
> + As a concrete example, consider:
> + arg0: { -16, -9, -10, ... } // (1, 3)
> + arg1: { -12, -5, -6, ... } // (1, 3)
> + sel = { 0, len, len + 1, ... } // (1, 3)
> +
> + This will create res with following encoding:
> + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
> + = { -16, -12, -5, ... }
> +
> + The step in above encoding would be: (-5) - (-12) = 7
> + And hence res[3] would be computed as -5 + 7 = 2.
> + instead of arg1[2], ie, -6.
> + Ensure that valid_mask_for_fold_vec_perm_cst returns false
> + for this case. */
> + {
> + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> +
> + vec_perm_builder builder (len, 1, 3);
> + poly_uint64 mask_elems[] = { 0, len, len+1 };
> + builder_push_elems (builder, mask_elems);
> +
> + vec_perm_indices sel (builder, 2, len);
> + const char *reason;
> + /* FIXME: It may happen that build_vec_cst_rand may build a natural
> + stepped pattern, even if we didn't explicitly tell it to. So folding
> + may not always fail, but if it does, ensure that's because arg1 does
> + not have a natural stepped sequence (and not due to other reason) */
> + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel,
> &reason);
> + if (res == NULL_TREE)
> + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
> + }
> +
> + /* Case 7: Same as Case 6, except that arg1 contains natural stepped
> + sequence and thus folding should be valid for this case. */
> + {
> + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> +
> + vec_perm_builder builder (len, 1, 3);
> + poly_uint64 mask_elems[] = { 0, len, len+1 };
> + builder_push_elems (builder, mask_elems);
> +
> + vec_perm_indices sel (builder, 2, len);
> + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
> +
> + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
> + validate_res (1, 3, res, expected_res);
> + }
> }
> }
>