Hi, currently we discard the cond-op mask when the loop is fully masked which causes wrong code in gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c when compiled with -O3 -march=cascadelake --param vect-partial-vector-usage=2.
This patch ANDs both masks instead. Bootstrapped and regtested on x86, aarch64 and power10. Regtested on riscv64 and armv8.8-a+sve via qemu. Regards Robin gcc/ChangeLog: * tree-vect-loop.cc (vectorize_fold_left_reduction): Merge loop mask and cond-op mask. --- gcc/tree-vect-loop.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 028692614bb..f9bf6a45611 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7215,7 +7215,21 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, tree len = NULL_TREE; tree bias = NULL_TREE; if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) - mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i); + { + tree mask_loop = vect_get_loop_mask (loop_vinfo, gsi, masks, + vec_num, vectype_in, i); + if (is_cond_op) + { + /* Merge the loop mask and the cond_op mask. */ + mask = make_ssa_name (TREE_TYPE (mask_loop)); + gassign *and_stmt = gimple_build_assign (mask, BIT_AND_EXPR, + mask_loop, + vec_opmask[i]); + gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); + } + else + mask = mask_loop; + } else if (is_cond_op) mask = vec_opmask[i]; if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) -- 2.45.1