Hi,
currently we discard the cond-op mask when the loop is fully masked
which causes wrong code in
gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
when compiled with
-O3 -march=cascadelake --param vect-partial-vector-usage=2.
This patch ANDs both masks instead.
Bootstrapped and regtested on x86, aarch64 and power10.
Regtested on riscv64 and armv8.8-a+sve via qemu.
Regards
Robin
gcc/ChangeLog:
* tree-vect-loop.cc (vectorize_fold_left_reduction): Merge loop
mask and cond-op mask.
---
gcc/tree-vect-loop.cc | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 028692614bb..f9bf6a45611 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7215,7 +7215,21 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
tree len = NULL_TREE;
tree bias = NULL_TREE;
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
- mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in,
i);
+ {
+ tree mask_loop = vect_get_loop_mask (loop_vinfo, gsi, masks,
+ vec_num, vectype_in, i);
+ if (is_cond_op)
+ {
+ /* Merge the loop mask and the cond_op mask. */
+ mask = make_ssa_name (TREE_TYPE (mask_loop));
+ gassign *and_stmt = gimple_build_assign (mask, BIT_AND_EXPR,
+ mask_loop,
+ vec_opmask[i]);
+ gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
+ }
+ else
+ mask = mask_loop;
+ }
else if (is_cond_op)
mask = vec_opmask[i];
if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
--
2.45.1