On Wed, 13 Dec 2023, Tamar Christina wrote:
> > > > else if (vect_use_mask_type_p (stmt_info))
> > > > {
> > > > unsigned int precision = stmt_info->mask_precision;
> > > > scalar_type = build_nonstandard_integer_type (precision, 1);
> > > > vectype = get_mask_type_for_scalar_type (vinfo, scalar_type,
> > > > group_size);
> > > > if (!vectype)
> > > > return opt_result::failure_at (stmt, "not vectorized:
> > > > unsupported"
> > > > " data-type %T\n", scalar_type);
> > > >
> > > > Richard, do you have any advice here? I suppose
> > > > vect_determine_precisions
> > > > needs to handle the gcond case with bool != 0 somehow and for the
> > > > extra mask producer we add here we have to emulate what it would have
> > > > done, right?
> > >
> > > How about handling gconds directly in vect_determine_mask_precision?
> > > In a sense it's not needed, since gconds are always roots, and so we
> > > could calculate their precision on the fly instead. But handling it in
> > > vect_determine_mask_precision feels like it should reduce the number
> > > of special cases.
> >
> > Yeah, that sounds worth trying.
> >
> > Richard.
>
> So here's a respin with this suggestion and the other issues fixed.
> Note that the testcases still need to be updated with the right stanzas.
>
> The patch is much smaller, I still have a small change to
> vect_get_vector_types_for_stmt in case we get there on a gcond where
> vect_recog_gcond_pattern couldn't apply due to the target missing an
> appropriate vectype. The change only gracefully rejects the gcond.
>
> Since patterns cannot apply to the same root twice I've had to also do
> the split of the condition out of the gcond in bitfield lowering.
Bah. Guess we want to fix that (next stage1). Can you please add
a comment to the split out done in vect_recog_bitfield_ref_pattern?
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no
> issues.
>
> Ok for master?
OK with the above change.
Thanks,
Richard.
> Thanks,
> Tamar
> gcc/ChangeLog:
>
> * tree-vect-patterns.cc (vect_init_pattern_stmt): Support gcond
> (vect_recog_bitfield_ref_pattern): Update to split out bool.
> (vect_recog_gcond_pattern): New.
> (possible_vector_mask_operation_p): Support gcond.
> (vect_determine_mask_precision): Likewise.
> * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts without
> lhs.
> (vectorizable_early_exit): New.
> (vect_analyze_stmt, vect_transform_stmt): Use it.
> (vect_get_vector_types_for_stmt): Rejects gcond if not lowered by
> vect_recog_gcond_pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-early-break_84.c: New test.
> * gcc.dg/vect/vect-early-break_85.c: New test.
> * gcc.dg/vect/vect-early-break_86.c: New test.
> * gcc.dg/vect/vect-early-break_87.c: New test.
> * gcc.dg/vect/vect-early-break_88.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_84.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_84.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..0622339491d333b07c2ce895785b5216713097a9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_84.c
> @@ -0,0 +1,39 @@
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include <stdbool.h>
> +
> +#ifndef N
> +#define N 17
> +#endif
> +bool vect_a[N] = { false, false, true, false, false, false,
> + false, false, false, false, false, false,
> + false, false, false, false, false };
> +unsigned vect_b[N] = { 0 };
> +
> +__attribute__ ((noinline, noipa))
> +unsigned test4(bool x)
> +{
> + unsigned ret = 0;
> + for (int i = 0; i < N; i++)
> + {
> + if (vect_a[i] == x)
> + return 1;
> + vect_a[i] = x;
> +
> + }
> + return ret;
> +}
> +
> +extern void abort ();
> +
> +int main ()
> +{
> + if (test4 (true) != 1)
> + abort ();
> +
> + if (vect_b[2] != 0 && vect_b[1] == 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_85.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_85.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..39b3d9bad8681a2d15d7fc7de86bdd3ce0f0bd4e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_85.c
> @@ -0,0 +1,35 @@
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#ifndef N
> +#define N 5
> +#endif
> +int vect_a[N] = { 5, 4, 8, 4, 6 };
> +unsigned vect_b[N] = { 0 };
> +
> +__attribute__ ((noinline, noipa))
> +unsigned test4(int x)
> +{
> + unsigned ret = 0;
> + for (int i = 0; i < N; i++)
> + {
> + if (vect_a[i] > x)
> + return 1;
> + vect_a[i] = x;
> +
> + }
> + return ret;
> +}
> +
> +extern void abort ();
> +
> +int main ()
> +{
> + if (test4 (7) != 1)
> + abort ();
> +
> + if (vect_b[2] != 0 && vect_b[1] == 0)
> + abort ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_86.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_86.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..66eb570f4028bca4b631329d7af50c646d3c0cb3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_86.c
> @@ -0,0 +1,21 @@
> +/* { dg-additional-options "-std=gnu89" } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +extern void abort ();
> +extern void exit (int);
> +
> +__attribute__((noinline, noipa))
> +int f(x) {
> + int i;
> + for (i = 0; i < 8 && (x & 1) == 1; i++)
> + x >>= 1;
> + return i;
> +}
> +main() {
> + if (f(4) != 0)
> + abort();
> + exit(0);
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_87.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_87.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..67be67da0583ba7feda3bed09c42fa735da9b98e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_87.c
> @@ -0,0 +1,21 @@
> +/* { dg-additional-options "-std=gnu89" } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +extern void abort ();
> +extern void exit (int);
> +
> +__attribute__((noinline, noipa))
> +int f(x) {
> + int i;
> + for (i = 0; i < 8 && (x & 1) == 0; i++)
> + x >>= 1;
> + return i;
> +}
> +main() {
> + if (f(4) != 2)
> + abort();
> + exit(0);
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_88.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_88.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..b64becd588973f58601196bfcb15afbe4bab60f2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_88.c
> @@ -0,0 +1,36 @@
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-additional-options "-Ofast --param vect-partial-vector-usage=2" } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#ifndef N
> +#define N 5
> +#endif
> +float vect_a[N] = { 5.1f, 4.2f, 8.0f, 4.25f, 6.5f };
> +unsigned vect_b[N] = { 0 };
> +
> +__attribute__ ((noinline, noipa))
> +unsigned test4(double x)
> +{
> + unsigned ret = 0;
> + for (int i = 0; i < N; i++)
> + {
> + if (vect_a[i] > x)
> + break;
> + vect_a[i] = x;
> +
> + }
> + return ret;
> +}
> +
> +extern void abort ();
> +
> +int main ()
> +{
> + if (test4 (7.0) != 0)
> + abort ();
> +
> + if (vect_b[2] != 0 && vect_b[1] == 0)
> + abort ();
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index
> 7debe7f0731673cd1bf25cd39d55e23990a73d0e..f6ce27a7c45aa6ce72c402987958ee395c045a14
> 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -132,6 +132,7 @@ vect_init_pattern_stmt (vec_info *vinfo, gimple
> *pattern_stmt,
> if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
> {
> gcc_assert (!vectype
> + || is_a <gcond *> (pattern_stmt)
> || (VECTOR_BOOLEAN_TYPE_P (vectype)
> == vect_use_mask_type_p (orig_stmt_info)));
> STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
> @@ -2786,15 +2787,24 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo,
> stmt_vec_info stmt_info,
>
> if (!lhs)
> {
> + if (!vectype)
> + return NULL;
> +
> append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
> + vectype = truth_type_for (vectype);
> +
> + tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
> gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
> tree cond_cst = gimple_cond_rhs (cond_stmt);
> + gimple *new_stmt
> + = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
> + gimple_get_lhs (pattern_stmt),
> + fold_convert (container_type, cond_cst));
> + append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype,
> container_type);
> pattern_stmt
> - = gimple_build_cond (gimple_cond_code (cond_stmt),
> - gimple_get_lhs (pattern_stmt),
> - fold_convert (ret_type, cond_cst),
> - gimple_cond_true_label (cond_stmt),
> - gimple_cond_false_label (cond_stmt));
> + = gimple_build_cond (NE_EXPR, new_lhs,
> + build_zero_cst (TREE_TYPE (new_lhs)),
> + NULL_TREE, NULL_TREE);
> }
>
> *type_out = STMT_VINFO_VECTYPE (stmt_info);
> @@ -5553,6 +5563,72 @@ integer_type_for_mask (tree var, vec_info *vinfo)
> return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
> }
>
> +/* Function vect_recog_gcond_pattern
> +
> + Try to find pattern like following:
> +
> + if (a op b)
> +
> + where operator 'op' is not != and convert it to an adjusted boolean
> pattern
> +
> + mask = a op b
> + if (mask != 0)
> +
> + and set the mask type on MASK.
> +
> + Input:
> +
> + * STMT_VINFO: The stmt at the end from which the pattern
> + search begins, i.e. cast of a bool to
> + an integer type.
> +
> + Output:
> +
> + * TYPE_OUT: The type of the output of this pattern.
> +
> + * Return value: A new stmt that will be used to replace the pattern. */
> +
> +static gimple *
> +vect_recog_gcond_pattern (vec_info *vinfo,
> + stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
> + gcond* cond = NULL;
> + if (!(cond = dyn_cast <gcond *> (last_stmt)))
> + return NULL;
> +
> + auto lhs = gimple_cond_lhs (cond);
> + auto rhs = gimple_cond_rhs (cond);
> + auto code = gimple_cond_code (cond);
> +
> + tree scalar_type = TREE_TYPE (lhs);
> + if (VECTOR_TYPE_P (scalar_type))
> + return NULL;
> +
> + if (code == NE_EXPR
> + && zerop (rhs)
> + && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
> + return NULL;
> +
> + tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
> + if (vecitype == NULL_TREE)
> + return NULL;
> +
> + tree vectype = truth_type_for (vecitype);
> +
> + tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
> + gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
> + append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
> +
> + gimple *pattern_stmt
> + = gimple_build_cond (NE_EXPR, new_lhs,
> + build_int_cst (TREE_TYPE (new_lhs), 0),
> + NULL_TREE, NULL_TREE);
> + *type_out = vectype;
> + vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
> + return pattern_stmt;
> +}
> +
> /* Function vect_recog_bool_pattern
>
> Try to find pattern like following:
> @@ -6581,15 +6657,26 @@ static bool
> possible_vector_mask_operation_p (stmt_vec_info stmt_info)
> {
> tree lhs = gimple_get_lhs (stmt_info->stmt);
> + tree_code code = ERROR_MARK;
> + gassign *assign = NULL;
> + gcond *cond = NULL;
> +
> + if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
> + code = gimple_assign_rhs_code (assign);
> + else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
> + {
> + lhs = gimple_cond_lhs (cond);
> + code = gimple_cond_code (cond);
> + }
> +
> if (!lhs
> || TREE_CODE (lhs) != SSA_NAME
> || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
> return false;
>
> - if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
> + if (code != ERROR_MARK)
> {
> - tree_code rhs_code = gimple_assign_rhs_code (assign);
> - switch (rhs_code)
> + switch (code)
> {
> CASE_CONVERT:
> case SSA_NAME:
> @@ -6600,7 +6687,7 @@ possible_vector_mask_operation_p (stmt_vec_info
> stmt_info)
> return true;
>
> default:
> - return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
> + return TREE_CODE_CLASS (code) == tcc_comparison;
> }
> }
> else if (is_a <gphi *> (stmt_info->stmt))
> @@ -6647,12 +6734,35 @@ vect_determine_mask_precision (vec_info *vinfo,
> stmt_vec_info stmt_info)
> The number of operations are equal, but M16 would have given
> a shorter dependency chain and allowed more ILP. */
> unsigned int precision = ~0U;
> - if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
> + gimple *stmt = STMT_VINFO_STMT (stmt_info);
> +
> + /* If the statement compares two values that shouldn't use vector masks,
> + try comparing the values as normal scalars instead. */
> + tree_code code = ERROR_MARK;
> + tree op0_type;
> + unsigned int nops = -1;
> + unsigned int ops_start = 0;
> +
> + if (gassign *assign = dyn_cast <gassign *> (stmt))
> + {
> + code = gimple_assign_rhs_code (assign);
> + op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
> + nops = gimple_num_ops (assign);
> + ops_start = 1;
> + }
> + else if (gcond *cond = dyn_cast <gcond *> (stmt))
> + {
> + code = gimple_cond_code (cond);
> + op0_type = TREE_TYPE (gimple_cond_lhs (cond));
> + nops = 2;
> + ops_start = 0;
> + }
> +
> + if (code != ERROR_MARK)
> {
> - unsigned int nops = gimple_num_ops (assign);
> - for (unsigned int i = 1; i < nops; ++i)
> + for (unsigned int i = ops_start; i < nops; ++i)
> {
> - tree rhs = gimple_op (assign, i);
> + tree rhs = gimple_op (stmt, i);
> if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
> continue;
>
> @@ -6669,19 +6779,15 @@ vect_determine_mask_precision (vec_info *vinfo,
> stmt_vec_info stmt_info)
> }
> }
>
> - /* If the statement compares two values that shouldn't use vector
> masks,
> - try comparing the values as normal scalars instead. */
> - tree_code rhs_code = gimple_assign_rhs_code (assign);
> if (precision == ~0U
> - && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
> + && TREE_CODE_CLASS (code) == tcc_comparison)
> {
> - tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
> scalar_mode mode;
> tree vectype, mask_type;
> - if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
> - && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
> - && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
> - && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
> + if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
> + && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
> + && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
> + && expand_vec_cmp_expr_p (vectype, mask_type, code))
> precision = GET_MODE_BITSIZE (mode);
> }
> }
> @@ -6860,6 +6966,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> { vect_recog_divmod_pattern, "divmod" },
> { vect_recog_mult_pattern, "mult" },
> { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
> + { vect_recog_gcond_pattern, "gcond" },
> { vect_recog_bool_pattern, "bool" },
> /* This must come before mask conversion, and includes the parts
> of mask conversion that are needed for gather and scatter
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index
> 582c5e678fad802d6e76300fe3c939b9f2978f17..766450cd85b55ce4dfd45878c5dc44cd09c68681
> 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -12489,7 +12489,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree
> vectype,
> vec<tree> vec_oprnds0 = vNULL;
> vec<tree> vec_oprnds1 = vNULL;
> tree mask_type;
> - tree mask;
> + tree mask = NULL_TREE;
>
> if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> return false;
> @@ -12629,8 +12629,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree
> vectype,
> /* Transform. */
>
> /* Handle def. */
> - lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
> - mask = vect_create_destination_var (lhs, mask_type);
> + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
> + if (lhs)
> + mask = vect_create_destination_var (lhs, mask_type);
>
> vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
> rhs1, &vec_oprnds0, vectype,
> @@ -12644,7 +12645,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree
> vectype,
> gimple *new_stmt;
> vec_rhs2 = vec_oprnds1[i];
>
> - new_temp = make_ssa_name (mask);
> + if (lhs)
> + new_temp = make_ssa_name (mask);
> + else
> + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
> if (bitop1 == NOP_EXPR)
> {
> new_stmt = gimple_build_assign (new_temp, code,
> @@ -12723,6 +12727,207 @@ vectorizable_comparison (vec_info *vinfo,
> return true;
> }
>
> +/* Check to see if the current early break given in STMT_INFO is valid for
> + vectorization. */
> +
> +static bool
> +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
> + gimple_stmt_iterator *gsi, gimple **vec_stmt,
> + slp_tree slp_node, stmt_vector_for_cost *cost_vec)
> +{
> + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> + if (!loop_vinfo
> + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
> + return false;
> +
> + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
> + return false;
> +
> + if (!STMT_VINFO_RELEVANT_P (stmt_info))
> + return false;
> +
> + DUMP_VECT_SCOPE ("vectorizable_early_exit");
> +
> + auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
> +
> + tree vectype = NULL_TREE;
> + slp_tree slp_op0;
> + tree op0;
> + enum vect_def_type dt0;
> + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0,
> &dt0,
> + &vectype))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "use not simple.\n");
> + return false;
> + }
> +
> + if (!vectype)
> + return false;
> +
> + machine_mode mode = TYPE_MODE (vectype);
> + int ncopies;
> +
> + if (slp_node)
> + ncopies = 1;
> + else
> + ncopies = vect_get_num_copies (loop_vinfo, vectype);
> +
> + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
> + bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
> +
> + /* Analyze only. */
> + if (!vec_stmt)
> + {
> + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "can't vectorize early exit because the "
> + "target doesn't support flag setting vector "
> + "comparisons.\n");
> + return false;
> + }
> +
> + if (ncopies > 1
> + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "can't vectorize early exit because the "
> + "target does not support boolean vector OR for "
> + "type %T.\n", vectype);
> + return false;
> + }
> +
> + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
> + vec_stmt, slp_node, cost_vec))
> + return false;
> +
> + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> + {
> + if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
> + OPTIMIZE_FOR_SPEED))
> + return false;
> + else
> + vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
> + }
> +
> +
> + return true;
> + }
> +
> + /* Tranform. */
> +
> + tree new_temp = NULL_TREE;
> + gimple *new_stmt = NULL;
> +
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
> +
> + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
> + vec_stmt, slp_node, cost_vec))
> + gcc_unreachable ();
> +
> + gimple *stmt = STMT_VINFO_STMT (stmt_info);
> + basic_block cond_bb = gimple_bb (stmt);
> + gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
> +
> + auto_vec<tree> stmts;
> +
> + if (slp_node)
> + stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
> + else
> + {
> + auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
> + stmts.reserve_exact (vec_stmts.length ());
> + for (auto stmt : vec_stmts)
> + stmts.quick_push (gimple_assign_lhs (stmt));
> + }
> +
> + /* Determine if we need to reduce the final value. */
> + if (stmts.length () > 1)
> + {
> + /* We build the reductions in a way to maintain as much parallelism as
> + possible. */
> + auto_vec<tree> workset (stmts.length ());
> +
> + /* Mask the statements as we queue them up. Normally we loop over
> + vec_num, but since we inspect the exact results of vectorization
> + we don't need to and instead can just use the stmts themselves. */
> + if (masked_loop_p)
> + for (unsigned i = 0; i < stmts.length (); i++)
> + {
> + tree stmt_mask
> + = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
> + i);
> + stmt_mask
> + = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
> + stmts[i], &cond_gsi);
> + workset.quick_push (stmt_mask);
> + }
> + else
> + workset.splice (stmts);
> +
> + while (workset.length () > 1)
> + {
> + new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
> + tree arg0 = workset.pop ();
> + tree arg1 = workset.pop ();
> + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
> + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
> + &cond_gsi);
> + workset.quick_insert (0, new_temp);
> + }
> + }
> + else
> + {
> + new_temp = stmts[0];
> + if (masked_loop_p)
> + {
> + tree mask
> + = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
> + new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
> + new_temp, &cond_gsi);
> + }
> + }
> +
> + gcc_assert (new_temp);
> +
> + /* Now build the new conditional. Pattern gimple_conds get dropped during
> + codegen so we must replace the original insn. */
> + stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
> + gcond *cond_stmt = as_a <gcond *>(stmt);
> + /* When vectorizing we assume that if the branch edge is taken that we're
> + exiting the loop. This is not however always the case as the compiler
> will
> + rewrite conditions to always be a comparison against 0. To do this it
> + sometimes flips the edges. This is fine for scalar, but for vector we
> + then have to flip the test, as we're still assuming that if you take the
> + branch edge that we found the exit condition. */
> + auto new_code = NE_EXPR;
> + tree cst = build_zero_cst (vectype);
> + if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
> + BRANCH_EDGE (gimple_bb (cond_stmt))->dest))
> + {
> + new_code = EQ_EXPR;
> + cst = build_minus_one_cst (vectype);
> + }
> +
> + gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
> + update_stmt (stmt);
> +
> + if (slp_node)
> + SLP_TREE_VEC_DEFS (slp_node).truncate (0);
> + else
> + STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
> +
> + if (!slp_node)
> + *vec_stmt = stmt;
> +
> + return true;
> +}
> +
> /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
> can handle all live statements in the node. Otherwise return true
> if STMT_INFO is not live or if vectorizable_live_operation can handle it.
> @@ -12949,7 +13154,9 @@ vect_analyze_stmt (vec_info *vinfo,
> || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
> stmt_info, NULL, node)
> || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
> - stmt_info, NULL, node, cost_vec));
> + stmt_info, NULL, node, cost_vec)
> + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> + cost_vec));
> else
> {
> if (bb_vinfo)
> @@ -12972,7 +13179,10 @@ vect_analyze_stmt (vec_info *vinfo,
> NULL, NULL, node, cost_vec)
> || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
> cost_vec)
> - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
> + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
> + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
> + cost_vec));
> +
> }
>
> if (node)
> @@ -13131,6 +13341,12 @@ vect_transform_stmt (vec_info *vinfo,
> gcc_assert (done);
> break;
>
> + case loop_exit_ctrl_vec_info_type:
> + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
> + slp_node, NULL);
> + gcc_assert (done);
> + break;
> +
> default:
> if (!STMT_VINFO_LIVE_P (stmt_info))
> {
> @@ -14321,6 +14537,14 @@ vect_get_vector_types_for_stmt (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
> else
> {
> + /* If we got here with a gcond it means that the target had no
> available vector
> + mode for the scalar type. We can't vectorize so abort. */
> + if (is_a <gcond *> (stmt))
> + return opt_result::failure_at (stmt,
> + "not vectorized:"
> + " unsupported data-type for gcond %T\n",
> + scalar_type);
> +
> if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
> scalar_type = TREE_TYPE (DR_REF (dr));
> else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)