On 7 November 2011 20:35, Jakub Jelinek <[email protected]> wrote:
> Hi!
Hi,
>
> Here is an updated patch, which handles both modifier == NONE
> and modifier == NARROW for SLP, after all it wasn't that hard.
> Additionally it checks that the fndecls and various call flags
> match, and adds some testcases.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux,
> ok for trunk?
> @@ -1723,6 +1764,55 @@ vectorizable_call (gimple stmt, gimple_s
> else
> VEC_truncate (tree, vargs, 0);
>
> + if (slp_node)
> + {
> + VEC (slp_void_p, heap) *vec_defs
> + = VEC_alloc (slp_void_p, heap, nargs);
> + VEC (tree, heap) *vec_oprnds0;
> +
> + for (i = 0; i < nargs; i++)
> + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
> + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
> + vec_oprnds0
> + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
> +
> + /* Arguments are ready. Create the new vector stmt. */
> + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
Was this line left by mistake?
> + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
> + i += 2)
> + {
> + size_t k;
> + VEC_truncate (tree, vargs, 0);
> + for (k = 0; k < nargs; k++)
> + {
> + VEC (tree, heap) *vec_oprndsk
> + = (VEC (tree, heap) *)
> + VEC_index (slp_void_p, vec_defs, k);
> + VEC_quick_push (tree, vargs,
> + VEC_index (tree, vec_oprndsk, i));
> + VEC_quick_push (tree, vargs,
> + VEC_index (tree, vec_oprndsk, i + 1));
> + }
> + new_stmt = gimple_build_call_vec (fndecl, vargs);
> + new_temp = make_ssa_name (vec_dest, new_stmt);
> + gimple_call_set_lhs (new_stmt, new_temp);
> + vect_finish_stmt_generation (stmt, new_stmt, gsi);
> + mark_symbols_for_renaming (new_stmt);
> + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
> + new_stmt);
> + }
> +
> + for (i = 0; i < nargs; i++)
> + {
> + VEC (tree, heap) *vec_oprndsi
> + = (VEC (tree, heap) *)
> + VEC_index (slp_void_p, vec_defs, i);
> + VEC_free (tree, heap, vec_oprndsi);
> + }
> + VEC_free (slp_void_p, heap, vec_defs);
> + continue;
> + }
> +
> for (i = 0; i < nargs; i++)
> {
> op = gimple_call_arg (stmt, i);
Could you please rearrange the tests (separate basic blocks and loops)
and make them actually test that bbs/loops were vectorized?
Also there is no need in dg-do run.
OK otherwise.
Thanks,
Ira
> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-07
> 15:05:36.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-07
> 15:07:10.000000000 +0100
> @@ -0,0 +1,100 @@
> +/* { dg-do run } */
> +
> +#include "tree-vect.h"
> +
> +extern float copysignf (float, float);
> +extern float sqrtf (float);
> +extern float fabsf (float);
> +extern void abort (void);
> +float a[64], b[64], c[64], d[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> + a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
> + a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
> + a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
> + a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
> + a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
> + a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
> + a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
> + a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int n)
> +{
> + int i;
> + for (i = 0; i < n; i++)
> + {
> + a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf
> (d[4 * i + 0]);
> + a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf
> (d[4 * i + 1]);
> + a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf
> (d[4 * i + 2]);
> + a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf
> (d[4 * i + 3]);
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (int n)
> +{
> + int i;
> + for (i = 0; i < 2 * n; i++)
> + {
> + a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf
> (d[2 * i + 0]);
> + a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf
> (d[2 * i + 1]);
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> + int i;
> + for (i = 0; i < 64; i++)
> + a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> + int i;
> +
> + for (i = 0; i < 64; i++)
> + {
> + asm ("");
> + b[i] = (i & 1) ? -4 * i : 4 * i;
> + c[i] = (i & 2) ? -8 * i : 8 * i;
> + d[i] = i * i;
> + }
> + f1 ();
> + for (i = 0; i < 8; i++)
> + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f)
> + abort ();
> + else
> + a[i] = 131.25;
> + f2 (16);
> + for (i = 0; i < 64; i++)
> + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >=
> 0.0001f)
> + abort ();
> + else
> + a[i] = 131.25;
> + f3 (16);
> + for (i = 0; i < 64; i++)
> + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >=
> 0.0001f)
> + abort ();
> + else
> + a[i] = 131.25;
> + f4 ();
> + for (i = 0; i < 64; i++)
> + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
> + abort ();
> + return 0;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> + return main1 ();
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj 2011-11-07
> 15:09:00.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c 2011-11-07
> 15:11:58.000000000 +0100
> @@ -0,0 +1,166 @@
> +/* { dg-do run } */
> +
> +#include "tree-vect.h"
> +
> +extern long int lrint (double);
> +extern void abort (void);
> +long int a[64];
> +double b[64];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (void)
> +{
> + a[0] = lrint (b[0]) + 1;
> + a[1] = lrint (b[1]) + 2;
> + a[2] = lrint (b[2]) + 3;
> + a[3] = lrint (b[3]) + 4;
> + a[4] = lrint (b[4]) + 5;
> + a[5] = lrint (b[5]) + 6;
> + a[6] = lrint (b[6]) + 7;
> + a[7] = lrint (b[7]) + 8;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int n)
> +{
> + int i;
> + for (i = 0; i < n; i++)
> + {
> + a[4 * i + 0] = lrint (b[4 * i + 0]) + 1;
> + a[4 * i + 1] = lrint (b[4 * i + 1]) + 2;
> + a[4 * i + 2] = lrint (b[4 * i + 2]) + 3;
> + a[4 * i + 3] = lrint (b[4 * i + 3]) + 4;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (int n)
> +{
> + int i;
> + for (i = 0; i < 2 * n; i++)
> + {
> + a[2 * i + 0] = lrint (b[2 * i + 0]) + 1;
> + a[2 * i + 1] = lrint (b[2 * i + 1]) + 2;
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (void)
> +{
> + int i;
> + for (i = 0; i < 64; i++)
> + a[i] = lrint (b[i]) + 1;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f5 (void)
> +{
> + a[0] = lrint (b[0]);
> + a[1] = lrint (b[1]);
> + a[2] = lrint (b[2]);
> + a[3] = lrint (b[3]);
> + a[4] = lrint (b[4]);
> + a[5] = lrint (b[5]);
> + a[6] = lrint (b[6]);
> + a[7] = lrint (b[7]);
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f6 (int n)
> +{
> + int i;
> + for (i = 0; i < n; i++)
> + {
> + a[4 * i + 0] = lrint (b[4 * i + 0]);
> + a[4 * i + 1] = lrint (b[4 * i + 1]);
> + a[4 * i + 2] = lrint (b[4 * i + 2]);
> + a[4 * i + 3] = lrint (b[4 * i + 3]);
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int n)
> +{
> + int i;
> + for (i = 0; i < 2 * n; i++)
> + {
> + a[2 * i + 0] = lrint (b[2 * i + 0]);
> + a[2 * i + 1] = lrint (b[2 * i + 1]);
> + }
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (void)
> +{
> + int i;
> + for (i = 0; i < 64; i++)
> + a[i] = lrint (b[i]);
> +}
> +
> +__attribute__((noinline, noclone)) int
> +main1 ()
> +{
> + int i;
> +
> + for (i = 0; i < 64; i++)
> + {
> + asm ("");
> + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25;
> + }
> + f1 ();
> + for (i = 0; i < 8; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i)
> + abort ();
> + else
> + a[i] = 131.25;
> + f2 (16);
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3))
> + abort ();
> + else
> + a[i] = 131.25;
> + f3 (16);
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1))
> + abort ();
> + else
> + a[i] = 131.25;
> + f4 ();
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1)
> + abort ();
> + else
> + a[i] = 131.25;
> + f5 ();
> + for (i = 0; i < 8; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> + abort ();
> + else
> + a[i] = 131.25;
> + f6 (16);
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> + abort ();
> + else
> + a[i] = 131.25;
> + f7 (16);
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> + abort ();
> + else
> + a[i] = 131.25;
> + f8 ();
> + for (i = 0; i < 64; i++)
> + if (a[i] != ((i & 1) ? -4 * i : 4 * i))
> + abort ();
> + return 0;
> +}
> +
> +int
> +main ()
> +{
> + check_vect ();
> + return main1 ();
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>
>
> Jakub
>