From: Pan Li <[email protected]>
This patch adds early break auto-vectorization support for target which
use length on partial vectorization. Consider this following example:
unsigned vect_a[802];
unsigned vect_b[802];
void test (unsigned x, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = x + i;
if (vect_a[i] > x)
break;
vect_a[i] = x;
}
}
We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:
...
_87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
_55 = (int) _87;
...
mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
if (vec_len_mask_72 != { 0, ... })
goto <bb 6>; [5.50%]
else
goto <bb 7>; [94.50%]
The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The aarch64 fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.
gcc/ChangeLog:
* tree-vect-stmts.cc (vectorizable_early_exit): Add loop len
handling for one or multiple stmt.
Signed-off-by: Pan Li <[email protected]>
---
gcc/tree-vect-stmts.cc | 47 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 21e8fe98e44..bfd9d66568f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12896,7 +12896,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info
stmt_info,
ncopies = vect_get_num_copies (loop_vinfo, vectype);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
/* Now build the new conditional. Pattern gimple_conds get dropped during
codegen so we must replace the original insn. */
@@ -12960,12 +12962,11 @@ vectorizable_early_exit (vec_info *vinfo,
stmt_vec_info stmt_info,
{
if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
OPTIMIZE_FOR_SPEED))
- return false;
+ vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
-
return true;
}
@@ -13018,6 +13019,25 @@ vectorizable_early_exit (vec_info *vinfo,
stmt_vec_info stmt_info,
stmts[i], &cond_gsi);
workset.quick_push (stmt_mask);
}
+ else if (len_loop_p)
+ for (unsigned i = 0; i < stmts.length (); i++)
+ {
+ tree all_ones_mask = build_all_ones_cst (vectype);
+ tree all_zero_mask = build_zero_cst (vectype);
+ tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
+ vectype, i, 1);
+ signed char cst = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ tree bias = build_int_cst (intQI_type_node, cst);
+ tree len_mask
+ = make_temp_ssa_name (TREE_TYPE (stmts[i]), NULL, "vec_len_mask");
+ gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5,
+ stmts[i], all_ones_mask,
+ all_zero_mask, len, bias);
+ gimple_call_set_lhs (call, len_mask);
+ gsi_insert_before (&cond_gsi, call, GSI_SAME_STMT);
+
+ workset.quick_push (len_mask);
+ }
else
workset.splice (stmts);
@@ -13042,6 +13062,29 @@ vectorizable_early_exit (vec_info *vinfo,
stmt_vec_info stmt_info,
new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
new_temp, &cond_gsi);
}
+ else if (len_loop_p)
+ {
+ /* len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+
+ which is equivalent to:
+
+ len_mask = compare_mask mask && i < len ? 1 : 0
+ */
+ tree all_ones_mask = build_all_ones_cst (vectype);
+ tree all_zero_mask = build_zero_cst (vectype);
+ tree len
+ = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies, vectype, 0, 1);
+ signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ tree bias = build_int_cst (intQI_type_node, biasval);
+ tree len_mask
+ = make_temp_ssa_name (TREE_TYPE (new_temp), NULL, "vec_len_mask");
+ gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5,
+ new_temp, all_ones_mask,
+ all_zero_mask, len, bias);
+ gimple_call_set_lhs (call, len_mask);
+ gsi_insert_before (&cond_gsi, call, GSI_SAME_STMT);
+ new_temp = len_mask;
+ }
}
gcc_assert (new_temp);
--
2.34.1