On Wed, 27 Nov 2013, Jakub Jelinek wrote:
> On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
> > Hmm. I'm still thinking that we should handle this during the regular
> > transform step.
>
> I wonder if it can't be done instead just in vectorizable_load,
> if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
> invariant, just emit the (broadcasted) load not inside of the loop, but on
> the loop preheader edge.
So this implements this suggestion, XFAILing the no longer handled cases.
For example we get
_94 = *b_8(D);
vect_cst_.18_95 = {_94, _94, _94, _94};
_99 = prolog_loop_adjusted_niters.9_132 * 4;
vectp_a.22_98 = a_6(D) + _99;
ivtmp.43_77 = (unsigned long) vectp_a.22_98;
<bb 13>:
# ivtmp.41_67 = PHI <ivtmp.41_70(3), 0(12)>
# ivtmp.43_71 = PHI <ivtmp.43_69(3), ivtmp.43_77(12)>
vect__10.19_97 = vect_cst_.18_95 + { 1, 1, 1, 1 };
_76 = (void *) ivtmp.43_71;
MEM[base: _76, offset: 0B] = vect__10.19_97;
...
instead of having hoisted *b_8 + 1 as scalar computation. Not sure
why LIM doesn't hoist the vector variant later.
vect__10.19_97 = vect_cst_.18_95 + vect_cst_.20_96;
invariant up to level 1, cost 1.
ah, the cost thing. Should be "improved" to see that hoisting
reduces the number of live SSA names in the loop.
Eventually lower_vector_ssa could optimize vector to scalar
code again ... (ick).
Bootstrap / regtest running on x86_64.
Comments?
Thanks,
Richard.
2014-01-13 Richard Biener <[email protected]>
PR tree-optimization/58921
PR tree-optimization/59006
* tree-vect-loop-manip.c (vect_loop_versioning): Remove code
hoisting invariant stmts.
* tree-vect-stmts.c (vectorizable_load): Insert the splat of
invariant loads on the preheader edge if possible.
* gcc.dg/torture/pr58921.c: New testcase.
* gcc.dg/torture/pr59006.c: Likewise.
* gcc.dg/vect/pr58508.c: XFAIL no longer handled cases.
Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c (revision 206576)
--- gcc/tree-vect-loop-manip.c (working copy)
*************** vect_loop_versioning (loop_vec_info loop
*** 2435,2507 ****
}
}
-
- /* Extract load statements on memrefs with zero-stride accesses. */
-
- if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
- {
- /* In the loop body, we iterate each statement to check if it is a load.
- Then we check the DR_STEP of the data reference. If DR_STEP is zero,
- then we will hoist the load statement to the loop preheader. */
-
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
-
- for (int i = 0; i < nbbs; ++i)
- {
- for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
- !gsi_end_p (si);)
- {
- gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
-
- if (is_gimple_assign (stmt)
- && (!dr
- || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
- {
- bool hoist = true;
- ssa_op_iter iter;
- tree var;
-
- /* We hoist a statement if all SSA uses in it are defined
- outside of the loop. */
- FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
- {
- gimple def = SSA_NAME_DEF_STMT (var);
- if (!gimple_nop_p (def)
- && flow_bb_inside_loop_p (loop, gimple_bb (def)))
- {
- hoist = false;
- break;
- }
- }
-
- if (hoist)
- {
- if (dr)
- gimple_set_vuse (stmt, NULL);
-
- gsi_remove (&si, false);
- gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
- stmt);
-
- if (dump_enabled_p ())
- {
- dump_printf_loc
- (MSG_NOTE, vect_location,
- "hoisting out of the vectorized loop: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- dump_printf (MSG_NOTE, "\n");
- }
- continue;
- }
- }
- gsi_next (&si);
- }
- }
- }
-
/* End loop-exit-fixes after versioning. */
if (cond_expr_stmt_list)
--- 2435,2440 ----
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 206576)
--- gcc/tree-vect-stmts.c (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 6368,6378 ****
/* 4. Handle invariant-load. */
if (inv_p && !bb_vinfo)
{
- gimple_stmt_iterator gsi2 = *gsi;
gcc_assert (!grouped_load);
! gsi_next (&gsi2);
! new_temp = vect_init_vector (stmt, scalar_dest,
! vectype, &gsi2);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
--- 6368,6402 ----
/* 4. Handle invariant-load. */
if (inv_p && !bb_vinfo)
{
gcc_assert (!grouped_load);
! /* If we have versioned for aliasing then we are sure
! this is a loop invariant load and thus we can insert
! it on the preheader edge. */
! if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
! {
! if (dump_enabled_p ())
! {
! dump_printf_loc (MSG_NOTE, vect_location,
! "hoisting out of the vectorized "
! "loop: ");
! dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
! dump_printf (MSG_NOTE, "\n");
! }
! tree tem = copy_ssa_name (scalar_dest, NULL);
! gsi_insert_on_edge_immediate
! (loop_preheader_edge (loop),
! gimple_build_assign (tem,
! unshare_expr
! (gimple_assign_rhs1 (stmt))));
! new_temp = vect_init_vector (stmt, tem, vectype, NULL);
! }
! else
! {
! gimple_stmt_iterator gsi2 = *gsi;
! gsi_next (&gsi2);
! new_temp = vect_init_vector (stmt, scalar_dest,
! vectype, &gsi2);
! }
new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
Index: gcc/testsuite/gcc.dg/torture/pr58921.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr58921.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr58921.c (working copy)
***************
*** 0 ****
--- 1,17 ----
+ /* { dg-do compile } */
+
+ int a[7];
+ int b;
+
+ void
+ fn1 ()
+ {
+ for (; b; b++)
+ a[b] = ((a[b] <= 0) == (a[0] != 0));
+ }
+
+ int
+ main ()
+ {
+ return 0;
+ }
Index: gcc/testsuite/gcc.dg/torture/pr59006.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr59006.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr59006.c (working copy)
***************
*** 0 ****
--- 1,13 ----
+ /* { dg-do compile } */
+
+ int a[8], b;
+ void fn1(void)
+ {
+ int c;
+ for (; b; b++)
+ {
+ int d = a[b];
+ c = a[0] ? d : 0;
+ a[b] = c;
+ }
+ }
Index: gcc/testsuite/gcc.dg/vect/pr58508.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr58508.c (revision 206576)
--- gcc/testsuite/gcc.dg/vect/pr58508.c (working copy)
*************** void test5 (int* a, int* b)
*** 66,70 ****
}
}
! /* { dg-final { scan-tree-dump-times "hoist" 8 "vect" { xfail vect_no_align }
} } */
/* { dg-final { cleanup-tree-dump "vect" } } */
--- 66,71 ----
}
}
! /* { dg-final { scan-tree-dump-times "hoist" 8 "vect" { xfail *-*-* } } } */
! /* { dg-final { scan-tree-dump-times "hoist" 3 "vect" { xfail vect_no_align }
} } */
/* { dg-final { cleanup-tree-dump "vect" } } */