On 8 November 2011 11:32, Jakub Jelinek <ja...@redhat.com> wrote: > On Tue, Nov 08, 2011 at 10:03:23AM +0200, Ira Rosen wrote: >> The second option would be nicer. > ... > > Thanks. Here is an updated patch, will bootstrap/regtest it now. > Ok for trunk if it passes?
Yes. Thanks, Ira > > 2011-11-08 Jakub Jelinek <ja...@redhat.com> > > * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument. > Handle vectorization of SLP calls. > (vect_analyze_stmt): Adjust caller, add call to it for SLP too. > (vect_transform_stmt): Adjust vectorizable_call caller, remove > assertion. > * tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start > with op_idx 3. > (vect_build_slp_tree): Allow CALL_EXPR. > > * lib/target-supports.exp (check_effective_target_vect_call_sqrtf, > check_effective_target_vect_call_copysignf, > check_effective_target_vect_call_lrint): New procedures. > * gcc.dg/vect/vect.exp: Run fast-math-bb-slp* tests using > $VECT_SLP_CFLAGS with -ffast-math. > * gcc.dg/vect/fast-math-vect-call-1.c: New test. > * gcc.dg/vect/fast-math-vect-call-2.c: New test. > * gcc.dg/vect/fast-math-bb-slp-call-1.c: New test. > * gcc.dg/vect/fast-math-bb-slp-call-2.c: New test. > > --- gcc/tree-vect-slp.c.jj 2011-11-07 20:32:03.000000000 +0100 > +++ gcc/tree-vect-slp.c 2011-11-08 09:28:12.000000000 +0100 > @@ -202,7 +202,10 @@ vect_get_and_check_slp_defs (loop_vec_in > loop = LOOP_VINFO_LOOP (loop_vinfo); > > if (is_gimple_call (stmt)) > - number_of_oprnds = gimple_call_num_args (stmt); > + { > + number_of_oprnds = gimple_call_num_args (stmt); > + op_idx = 3; > + } > else if (is_gimple_assign (stmt)) > { > number_of_oprnds = gimple_num_ops (stmt) - 1; > @@ -558,7 +561,25 @@ vect_build_slp_tree (loop_vec_info loop_ > ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); > > if (is_gimple_call (stmt)) > - rhs_code = CALL_EXPR; > + { > + rhs_code = CALL_EXPR; > + if (gimple_call_internal_p (stmt) > + || gimple_call_tail_p (stmt) > + || gimple_call_noreturn_p (stmt) > + || !gimple_call_nothrow_p (stmt) > + || gimple_call_chain (stmt)) > + { > + if (vect_print_dump_info (REPORT_SLP)) > + { > + fprintf (vect_dump, > + "Build SLP failed: unsupported call type "); > + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); > + } > + > + vect_free_oprnd_info (&oprnds_info, true); > + return false; > + } > + } > else > rhs_code = gimple_assign_rhs_code (stmt); > > @@ -653,6 +674,27 @@ vect_build_slp_tree (loop_vec_info loop_ > vect_free_oprnd_info (&oprnds_info, true); > return false; > } > + > + if (rhs_code == CALL_EXPR) > + { > + gimple first_stmt = VEC_index (gimple, stmts, 0); > + if (gimple_call_num_args (stmt) != nops > + || !operand_equal_p (gimple_call_fn (first_stmt), > + gimple_call_fn (stmt), 0) > + || gimple_call_fntype (first_stmt) > + != gimple_call_fntype (stmt)) > + { > + if (vect_print_dump_info (REPORT_SLP)) > + { > + fprintf (vect_dump, > + "Build SLP failed: different calls in "); > + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); > + } > + > + vect_free_oprnd_info (&oprnds_info, true); > + return false; > + } > + } > } > > /* Strided store or load. */ > @@ -786,7 +828,8 @@ vect_build_slp_tree (loop_vec_info loop_ > /* Not memory operation. */ > if (TREE_CODE_CLASS (rhs_code) != tcc_binary > && TREE_CODE_CLASS (rhs_code) != tcc_unary > - && rhs_code != COND_EXPR) > + && rhs_code != COND_EXPR > + && rhs_code != CALL_EXPR) > { > if (vect_print_dump_info (REPORT_SLP)) > { > --- gcc/tree-vect-stmts.c.jj 2011-11-07 20:32:09.000000000 +0100 > +++ gcc/tree-vect-stmts.c 2011-11-08 09:28:55.000000000 +0100 > @@ -1521,7 +1521,8 @@ vectorizable_function (gimple call, tree > Return FALSE if not a vectorizable STMT, TRUE otherwise. */ > > static bool > -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) > +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, > + slp_tree slp_node) > { > tree vec_dest; > tree scalar_dest; > @@ -1532,6 +1533,7 @@ vectorizable_call (gimple stmt, gimple_s > int nunits_in; > int nunits_out; > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); > + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); > tree fndecl, new_temp, def, rhs_type; > gimple def_stmt; > enum vect_def_type dt[3] > @@ -1543,19 +1545,12 @@ vectorizable_call (gimple stmt, gimple_s > size_t i, nargs; > tree lhs; > > - /* FORNOW: unsupported in basic block SLP. */ > - gcc_assert (loop_vinfo); > - > - if (!STMT_VINFO_RELEVANT_P (stmt_info)) > + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > return false; > > if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) > return false; > > - /* FORNOW: SLP not supported. */ > - if (STMT_SLP_TYPE (stmt_info)) > - return false; > - > /* Is STMT a vectorizable call? */ > if (!is_gimple_call (stmt)) > return false; > @@ -1596,7 +1591,7 @@ vectorizable_call (gimple stmt, gimple_s > if (!rhs_type) > rhs_type = TREE_TYPE (op); > > - if (!vect_is_simple_use_1 (op, loop_vinfo, NULL, > + if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo, > &def_stmt, &def, &dt[i], &opvectype)) > { > if (vect_print_dump_info (REPORT_DETAILS)) > @@ -1658,7 +1653,9 @@ vectorizable_call (gimple stmt, gimple_s > > gcc_assert (!gimple_vuse (stmt)); > > - if (modifier == NARROW) > + if (slp_node || PURE_SLP_STMT (stmt_info)) > + ncopies = 1; > + else if (modifier == NARROW) > ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; > else > ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; > @@ -1697,6 +1694,50 @@ vectorizable_call (gimple stmt, gimple_s > else > VEC_truncate (tree, vargs, 0); > > + if (slp_node) > + { > + VEC (slp_void_p, heap) *vec_defs > + = VEC_alloc (slp_void_p, heap, nargs); > + VEC (tree, heap) *vec_oprnds0; > + > + for (i = 0; i < nargs; i++) > + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); > + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); > + vec_oprnds0 > + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); > + > + /* Arguments are ready. Create the new vector stmt. */ > + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) > + { > + size_t k; > + for (k = 0; k < nargs; k++) > + { > + VEC (tree, heap) *vec_oprndsk > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, k); > + VEC_replace (tree, vargs, k, > + VEC_index (tree, vec_oprndsk, i)); > + } > + new_stmt = gimple_build_call_vec (fndecl, vargs); > + new_temp = make_ssa_name (vec_dest, new_stmt); > + gimple_call_set_lhs (new_stmt, new_temp); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + mark_symbols_for_renaming (new_stmt); > + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), > + new_stmt); > + } > + > + for (i = 0; i < nargs; i++) > + { > + VEC (tree, heap) *vec_oprndsi > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, i); > + VEC_free (tree, heap, vec_oprndsi); > + } > + VEC_free (slp_void_p, heap, vec_defs); > + continue; > + } > + > for (i = 0; i < nargs; i++) > { > op = gimple_call_arg (stmt, i); > @@ -1739,6 +1780,54 @@ vectorizable_call (gimple stmt, gimple_s > else > VEC_truncate (tree, vargs, 0); > > + if (slp_node) > + { > + VEC (slp_void_p, heap) *vec_defs > + = VEC_alloc (slp_void_p, heap, nargs); > + VEC (tree, heap) *vec_oprnds0; > + > + for (i = 0; i < nargs; i++) > + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); > + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); > + vec_oprnds0 > + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); > + > + /* Arguments are ready. Create the new vector stmt. */ > + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0); > + i += 2) > + { > + size_t k; > + VEC_truncate (tree, vargs, 0); > + for (k = 0; k < nargs; k++) > + { > + VEC (tree, heap) *vec_oprndsk > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, k); > + VEC_quick_push (tree, vargs, > + VEC_index (tree, vec_oprndsk, i)); > + VEC_quick_push (tree, vargs, > + VEC_index (tree, vec_oprndsk, i + 1)); > + } > + new_stmt = gimple_build_call_vec (fndecl, vargs); > + new_temp = make_ssa_name (vec_dest, new_stmt); > + gimple_call_set_lhs (new_stmt, new_temp); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + mark_symbols_for_renaming (new_stmt); > + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), > + new_stmt); > + } > + > + for (i = 0; i < nargs; i++) > + { > + VEC (tree, heap) *vec_oprndsi > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, i); > + VEC_free (tree, heap, vec_oprndsi); > + } > + VEC_free (slp_void_p, heap, vec_defs); > + continue; > + } > + > for (i = 0; i < nargs; i++) > { > op = gimple_call_arg (stmt, i); > @@ -1804,7 +1893,8 @@ vectorizable_call (gimple stmt, gimple_s > lhs = gimple_call_lhs (stmt); > new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); > set_vinfo_for_stmt (new_stmt, stmt_info); > - set_vinfo_for_stmt (stmt, NULL); > + if (!slp_node) > + set_vinfo_for_stmt (stmt, NULL); > STMT_VINFO_STMT (stmt_info) = new_stmt; > gsi_replace (gsi, new_stmt, false); > SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; > @@ -5265,7 +5355,7 @@ vect_analyze_stmt (gimple stmt, bool *ne > || vectorizable_operation (stmt, NULL, NULL, NULL) > || vectorizable_assignment (stmt, NULL, NULL, NULL) > || vectorizable_load (stmt, NULL, NULL, NULL, NULL) > - || vectorizable_call (stmt, NULL, NULL) > + || vectorizable_call (stmt, NULL, NULL, NULL) > || vectorizable_store (stmt, NULL, NULL, NULL) > || vectorizable_reduction (stmt, NULL, NULL, NULL) > || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); > @@ -5277,6 +5367,7 @@ vect_analyze_stmt (gimple stmt, bool *ne > || vectorizable_operation (stmt, NULL, NULL, node) > || vectorizable_assignment (stmt, NULL, NULL, node) > || vectorizable_load (stmt, NULL, NULL, node, NULL) > + || vectorizable_call (stmt, NULL, NULL, node) > || vectorizable_store (stmt, NULL, NULL, node) > || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); > } > @@ -5391,8 +5482,7 @@ vect_transform_stmt (gimple stmt, gimple > break; > > case call_vec_info_type: > - gcc_assert (!slp_node); > - done = vectorizable_call (stmt, gsi, &vec_stmt); > + done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); > stmt = gsi_stmt (*gsi); > break; > > --- gcc/testsuite/lib/target-supports.exp.jj 2011-11-08 09:26:58.000000000 > +0100 > +++ gcc/testsuite/lib/target-supports.exp 2011-11-08 10:15:38.000000000 > +0100 > @@ -3520,6 +3520,58 @@ proc check_effective_target_vect64 { } { > return $et_vect64_saved > } > > +# Return 1 if the target supports vector copysignf calls. > + > +proc check_effective_target_vect_call_copysignf { } { > + global et_vect_call_copysignf_saved > + > + if [info exists et_vect_call_copysignf_saved] { > + verbose "check_effective_target_vect_call_copysignf: using cached > result" 2 > + } else { > + set et_vect_call_copysignf_saved 0 > + if { [istarget i?86-*-*] > + || [istarget x86_64-*-*] > + || [istarget powerpc*-*-*] } { > + set et_vect_call_copysignf_saved 1 > + } > + } > + > + verbose "check_effective_target_vect_call_copysignf: returning > $et_vect_call_copysignf_saved" 2 > + return $et_vect_call_copysignf_saved > +} > + > +# Return 1 if the target supports vector sqrtf calls. > + > +proc check_effective_target_vect_call_sqrtf { } { > + global et_vect_call_sqrtf_saved > + > + if [info exists et_vect_call_sqrtf_saved] { > + verbose "check_effective_target_vect_call_sqrtf: using cached result" > 2 > + } else { > + set et_vect_call_sqrtf_saved 0 > + if { [istarget i?86-*-*] > + || [istarget x86_64-*-*] > + || ([istarget powerpc*-*-*] && [check_vsx_hw_available]) } { > + set et_vect_call_sqrtf_saved 1 > + } > + } > + > + verbose "check_effective_target_vect_call_sqrtf: returning > $et_vect_call_sqrtf_saved" 2 > + return $et_vect_call_sqrtf_saved > +} > + > +# Return 1 if the target supports vector lrint calls. > + > +proc check_effective_target_vect_call_lrint { } { > + set et_vect_call_lrint 0 > + if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) && > [check_effective_target_ilp32] } { > + set et_vect_call_lrint 1 > + } > + > + verbose "check_effective_target_vect_call_lrint: returning > $et_vect_call_lrint" 2 > + return $et_vect_call_lrint > +} > + > # Return 1 if the target supports section-anchors > > proc check_effective_target_section_anchors { } { > --- gcc/testsuite/gcc.dg/vect/vect.exp.jj 2011-10-24 12:21:08.000000000 > +0200 > +++ gcc/testsuite/gcc.dg/vect/vect.exp 2011-11-08 10:09:27.000000000 +0100 > @@ -104,9 +104,15 @@ dg-runtest [lsort [glob -nocomplain $src > # -ffast-math tests > set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS > lappend DEFAULT_VECTCFLAGS "-ffast-math" > -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-*.\[cS\]]] \ > +dg-runtest [lsort [glob -nocomplain > $srcdir/$subdir/fast-math-\[ipsv\]*.\[cS\]]] \ > "" $DEFAULT_VECTCFLAGS > > +# -ffast-math SLP tests > +set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS > +lappend VECT_SLP_CFLAGS "-ffast-math" > +dg-runtest [lsort [glob -nocomplain > $srcdir/$subdir/fast-math-bb-slp-*.\[cS\]]] \ > + "" $VECT_SLP_CFLAGS > + > # -fno-fast-math tests > set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS > lappend DEFAULT_VECTCFLAGS "-fno-fast-math" > --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-08 > 09:28:12.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-08 > 09:57:19.000000000 +0100 > @@ -0,0 +1,81 @@ > +#include "tree-vect.h" > + > +extern float copysignf (float, float); > +extern float sqrtf (float); > +extern float fabsf (float); > +extern void abort (void); > +float a[64], b[64], c[64], d[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf > (d[4 * i + 0]); > + a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf > (d[4 * i + 1]); > + a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf > (d[4 * i + 2]); > + a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf > (d[4 * i + 3]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f2 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf > (d[2 * i + 0]); > + a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf > (d[2 * i + 1]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f3 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 64; i++) > + { > + asm (""); > + b[i] = (i & 1) ? -4 * i : 4 * i; > + c[i] = (i & 2) ? -8 * i : 8 * i; > + d[i] = i * i; > + } > + f1 (16); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= > 0.0001f) > + abort (); > + else > + a[i] = 131.25; > + f2 (16); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= > 0.0001f) > + abort (); > + else > + a[i] = 131.25; > + f3 (); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target > { vect_call_copysignf && vect_call_sqrtf } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" > { target { vect_call_copysignf && vect_call_sqrtf } } } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj 2011-11-08 > 09:28:12.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c 2011-11-08 > 10:03:37.000000000 +0100 > @@ -0,0 +1,128 @@ > +#include "tree-vect.h" > + > +extern long int lrint (double); > +extern void abort (void); > +long int a[64]; > +double b[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = lrint (b[4 * i + 0]) + 1; > + a[4 * i + 1] = lrint (b[4 * i + 1]) + 2; > + a[4 * i + 2] = lrint (b[4 * i + 2]) + 3; > + a[4 * i + 3] = lrint (b[4 * i + 3]) + 4; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f2 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = lrint (b[2 * i + 0]) + 1; > + a[2 * i + 1] = lrint (b[2 * i + 1]) + 2; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f3 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = lrint (b[i]) + 1; > +} > + > +__attribute__((noinline, noclone)) void > +f4 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = lrint (b[4 * i + 0]); > + a[4 * i + 1] = lrint (b[4 * i + 1]); > + a[4 * i + 2] = lrint (b[4 * i + 2]); > + a[4 * i + 3] = lrint (b[4 * i + 3]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f5 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = lrint (b[2 * i + 0]); > + a[2 * i + 1] = lrint (b[2 * i + 1]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f6 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = lrint (b[i]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 64; i++) > + { > + asm (""); > + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; > + } > + f1 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3)) > + abort (); > + else > + a[i] = 131.25; > + f2 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1)) > + abort (); > + else > + a[i] = 131.25; > + f3 (); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1) > + abort (); > + else > + a[i] = 131.25; > + f4 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + else > + a[i] = 131.25; > + f5 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + else > + a[i] = 131.25; > + f6 (); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" { target > vect_call_lrint } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" > { target vect_call_lrint } } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c.jj 2011-11-08 > 09:46:00.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-1.c 2011-11-08 > 09:49:49.000000000 +0100 > @@ -0,0 +1,49 @@ > +#include "tree-vect.h" > + > +extern float copysignf (float, float); > +extern float sqrtf (float); > +extern float fabsf (float); > +extern void abort (void); > +float a[64], b[64], c[64], d[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]); > + a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]); > + a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]); > + a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]); > + a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]); > + a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]); > + a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]); > + a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 8; i++) > + { > + asm (""); > + b[i] = (i & 1) ? -4 * i : 4 * i; > + c[i] = (i & 2) ? -8 * i : 8 * i; > + d[i] = i * i; > + } > + f1 (); > + for (i = 0; i < 8; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 > "slp" { target { vect_call_copysignf && vect_call_sqrtf } } } } */ > +/* { dg-final { cleanup-tree-dump "slp" } } */ > --- gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c.jj 2011-11-08 > 09:46:04.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-bb-slp-call-2.c 2011-11-08 > 10:11:20.000000000 +0100 > @@ -0,0 +1,65 @@ > +#include "tree-vect.h" > + > +extern long int lrint (double); > +extern void abort (void); > +long int a[64]; > +double b[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + a[0] = lrint (b[0]) + 1; > + a[1] = lrint (b[1]) + 2; > + a[2] = lrint (b[2]) + 3; > + a[3] = lrint (b[3]) + 4; > + a[4] = lrint (b[4]) + 5; > + a[5] = lrint (b[5]) + 6; > + a[6] = lrint (b[6]) + 7; > + a[7] = lrint (b[7]) + 8; > +} > + > +__attribute__((noinline, noclone)) void > +f2 (void) > +{ > + a[0] = lrint (b[0]); > + a[1] = lrint (b[1]); > + a[2] = lrint (b[2]); > + a[3] = lrint (b[3]); > + a[4] = lrint (b[4]); > + a[5] = lrint (b[5]); > + a[6] = lrint (b[6]); > + a[7] = lrint (b[7]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 8; i++) > + { > + asm (""); > + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; > + } > + f1 (); > + for (i = 0; i < 8; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i) > + abort (); > + else > + a[i] = 131.25; > + f2 (); > + for (i = 0; i < 8; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 2 > "slp" { target vect_call_lrint } } } */ > +/* { dg-final { cleanup-tree-dump "slp" } } */ > > > Jakub >