The following uses SLP_TREE_REDUC_IDX where it looks more appropriate.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
Tamar, can you test 1+2 (separately)? Possibly also the full stack
if the first part succeeds.
Thanks,
Richard.
* tree-vect-loop.cc (vect_create_epilog_for_reduction):
Use SLP_TREE_REDUC_IDX for following the SLP graph and
for identifying whether we use the 'else' in a COND.
(vectorizable_lane_reducing): Simplify check of whether
we are in a reduction.
(vectorizable_reduction): Add sanity checking around
SLP_TREE_REDUC_IDX and use it where it looks appropriate.
(vect_transform_reduction): Use SLP_TREE_REDUC_IDX.
* tree-vect-stmts.cc (vectorizable_call): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_condition): Likewise.
---
gcc/tree-vect-loop.cc | 31 +++++++++++++------------------
gcc/tree-vect-stmts.cc | 8 ++++----
2 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 4af7283485e..b187d0d8533 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5497,7 +5497,6 @@ vect_create_epilog_for_reduction (loop_vec_info
loop_vinfo,
while (cond_node != slp_node_instance->reduc_phis)
{
stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
- int slp_reduc_idx;
if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
{
gimple *vec_stmt
@@ -5505,16 +5504,9 @@ vect_create_epilog_for_reduction (loop_vec_info
loop_vinfo,
gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
ccompares.safe_push
(std::make_pair (gimple_assign_rhs1 (vec_stmt),
- STMT_VINFO_REDUC_IDX (cond_info) == 2));
- /* ??? We probably want to have REDUC_IDX on the SLP node?
- We have both three and four children COND_EXPR nodes
- dependent on whether the comparison is still embedded
- as GENERIC. So work backwards. */
- slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3
- + STMT_VINFO_REDUC_IDX (cond_info));
+ SLP_TREE_REDUC_IDX (cond_node) == 2));
}
- else
- slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info);
+ int slp_reduc_idx = SLP_TREE_REDUC_IDX (cond_node);
cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx];
}
gcc_assert (ccompares.length () != 0);
@@ -6882,14 +6874,13 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
if (!type_has_mode_precision_p (type))
return false;
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
+
/* TODO: Support lane-reducing operation that does not directly participate
in loop reduction. */
- if (!STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
- || STMT_VINFO_REDUC_IDX (stmt_info) < 0)
+ if (!reduc_info)
return false;
- vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
-
/* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not
recoginized. */
gcc_assert (!nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
stmt_info));
@@ -7135,7 +7126,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
stmt_vec_info vdef = vect_stmt_to_vectorize (def);
int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
- if (reduc_idx == -1)
+ if (STMT_VINFO_REDUC_IDX (vdef) == -1
+ || SLP_TREE_REDUC_IDX (vdef_slp) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7204,7 +7196,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
else if (!vectype_in)
vectype_in = SLP_TREE_VECTYPE (slp_node);
if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
- vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+ {
+ gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp));
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+ }
}
reduc_def = op.ops[reduc_idx];
@@ -7361,7 +7356,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
/* When the condition uses the reduction value in the condition, fail.
*/
- if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
+ if (SLP_TREE_REDUC_IDX (slp_node) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8001,7 +7996,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]);
vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 77a03ed4a7b..15e0d069dcc 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3432,7 +3432,7 @@ vectorizable_call (vec_info *vinfo,
}
}
- int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
internal_fn cond_fn = get_conditional_internal_fn (ifn);
internal_fn cond_len_fn = get_len_internal_fn (ifn);
int len_opno = internal_fn_len_index (cond_len_fn);
@@ -6452,7 +6452,7 @@ vectorizable_operation (vec_info *vinfo,
using_emulated_vectors_p = true;
}
- int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6570,7 +6570,7 @@ vectorizable_operation (vec_info *vinfo,
else if (arith_code_with_undefined_signed_overflow (orig_code)
&& ANY_INTEGRAL_TYPE_P (vectype)
&& TYPE_OVERFLOW_UNDEFINED (vectype)
- && STMT_VINFO_REDUC_IDX (stmt_info) != -1)
+ && SLP_TREE_REDUC_IDX (slp_node) != -1)
{
gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
|| orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
@@ -11560,7 +11560,7 @@ vectorizable_condition (vec_info *vinfo,
if (code != COND_EXPR)
return false;
- int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
bool nested_cycle_p = false;
bool for_reduction = vect_is_reduction (stmt_info);
--
2.43.0