On Fri, 5 Jul 2024, Robin Dapp wrote: > Hi, > > in PR115336 we have the following > > vect_patt_391 = .MASK_LEN_GATHER_LOAD (_470, vect__59, 1, { 0, ... }, { 0, > ... }, _482, 0); > vect_iftmp.44 = vect_patt_391 | { 1, ... }; > .MASK_LEN_STORE (vectp_f.45, 8B, { -1, ... }, _482, 0, vect_iftmp.44); > > which assumes that a maskload sets the masked-out elements to zero.
To me this looks like mis-applying of match.pd:6083? Applying pattern match.pd:6083, gimple-match-1.cc:45749 gimple_simplified to iftmp.0_62 = iftmp.0_61 | _219; new phi replacement stmt iftmp.0_62 = iftmp.0_61 | _219; so originally it wasn't iftmp.0_61 = .MASK_LOAD (_260, 8B, _259); iftmp.0_62 = iftmp.0_61 | _219; but sth like iftmp.0_62 = <mask> ? _219 : iftmp.0_61; and iftmp.0_61 is zero-one-valued because it is _Bool. Shouldn't if-conversion always turn the COND_EXPR into a .COND_IOR here? > RVV does not guarantee this and, unfortunately, leaves it to the > hardware implementation to do basically anything it wants (even keep > the previous value). In the PR this leads to us reusing a previous > vector register and stale, nonzero values causing an invalid result. > > Based on a Richard Sandiford's feedback I started with the attached > patch - it's more an RFC in its current shape and obviously not > tested exhaustively. > > The idea is: > - In ifcvt emit a VCOND_MASK (mask, load_result, preferred_else_value) > after a MASK_LOAD if the target requires it. > - Elide the VCOND_MASK when there is a COND_OP with a replacing else > value later. > > Is this, generally, reasonable or is there a better way? > > My initial idea was to add an else value to MASK_LOAD. Richard's > concern was, though, that this might make nonzero else values > appear inexpensive when they are actually not. > > Even though I, mechanically, added match.pd patterns to catch > the most common cases (already at a point where a separate function > maybe in gimple-match-exports? would make more sense), there is > still significant code-quality fallout. > The regressing cases are often of a form where the VCOND_MASK is > not just a conversion away but rather hidden behind some unmasked > operation. I'm not sure if we could ever recognize everything that > way without descending very deep. > > Regards > Robin > > gcc/ChangeLog: > > PR middle-end/115336 > > * config/riscv/riscv.cc (riscv_preferred_else_value): Add > MASK_LOAD. > * config/riscv/riscv.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO): > Set to true. > * defaults.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO): New. > * doc/tm.texi: Document. > * doc/tm.texi.in: Document. > * match.pd: Add patterns to allow replacing the else value of a > VEC_COND. > * tree-if-conv.cc (predicate_load_or_store): Emit a VEC_COND > after a MASK_LOAD if the target does not guarantee zeroing. > (predicate_statements): Add temporary lhs argument. > * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Re-fold fold > result. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/pr115336.c: New test. > --- > gcc/config/riscv/riscv.cc | 2 + > gcc/config/riscv/riscv.h | 2 + > gcc/defaults.h | 4 + > gcc/doc/tm.texi | 5 + > gcc/doc/tm.texi.in | 5 + > gcc/match.pd | 125 +++++++++++++++++- > .../gcc.target/riscv/rvv/autovec/pr115336.c | 20 +++ > gcc/tree-if-conv.cc | 57 +++++++- > gcc/tree-ssa-math-opts.cc | 8 ++ > 9 files changed, 217 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index c17141d909a..e10bc6824b9 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -11481,6 +11481,8 @@ riscv_preferred_else_value (unsigned ifn, tree > vectype, unsigned int nops, > { > if (riscv_v_ext_mode_p (TYPE_MODE (vectype))) > return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype)); > + else if (ifn == IFN_MASK_LOAD) > + return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype)); > > return default_preferred_else_value (ifn, vectype, nops, ops); > } > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 57910eecd3e..dc15eb5e60f 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -1264,4 +1264,6 @@ extern void riscv_remove_unneeded_save_restore_calls > (void); > /* Check TLS Descriptors mechanism is selected. */ > #define TARGET_TLSDESC (riscv_tls_dialect == TLS_DESCRIPTORS) > > +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 1 > + > #endif /* ! GCC_RISCV_H */ > diff --git a/gcc/defaults.h b/gcc/defaults.h > index 92f3e07f742..6ffbdaea229 100644 > --- a/gcc/defaults.h > +++ b/gcc/defaults.h > @@ -1457,6 +1457,10 @@ see the files COPYING3 and COPYING.RUNTIME > respectively. If not, see > #define DWARF_VERSION_DEFAULT 5 > #endif > > +#ifndef TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO > +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 0 > +#endif > + > #ifndef USED_FOR_TARGET > /* Done this way to keep gengtype happy. */ > #if BITS_PER_UNIT == 8 > diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi > index be5543b72f8..2fdebe7fd21 100644 > --- a/gcc/doc/tm.texi > +++ b/gcc/doc/tm.texi > @@ -12683,6 +12683,11 @@ maintainer is familiar with. > > @end defmac > > +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO > +Bla > + > +@end defmac > + > @deftypefn {Target Hook} bool TARGET_HAVE_SPECULATION_SAFE_VALUE (bool > @var{active}) > This hook is used to determine the level of target support for > @code{__builtin_speculation_safe_value}. If called with an argument > diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in > index 87a7f895174..276c38325dc 100644 > --- a/gcc/doc/tm.texi.in > +++ b/gcc/doc/tm.texi.in > @@ -8097,6 +8097,11 @@ maintainer is familiar with. > > @end defmac > > +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO > +Bla > + > +@end defmac > + > @hook TARGET_HAVE_SPECULATION_SAFE_VALUE > > @hook TARGET_SPECULATION_SAFE_VALUE > diff --git a/gcc/match.pd b/gcc/match.pd > index 3d0689c9312..2fdc14ef56f 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -9744,7 +9744,22 @@ and, > #endif > > /* Detect cases in which a VEC_COND_EXPR effectively replaces the > - "else" value of an IFN_COND_*. */ > + "else" value of an IFN_COND_* as well as when the IFN_COND_* > + replaces the else of the VEC_COND_EXPR. */ > +(for cond_op (COND_UNARY) > + (simplify > + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 (view_convert:op_type @1) @3))))) > + > +(for cond_len_op (COND_LEN_UNARY) > + (simplify > + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5))))) > + > (for cond_op (COND_BINARY) > (simplify > (vec_cond @0 (view_convert? (cond_op @0 @1 @2 @3)) @4) > @@ -9756,7 +9771,27 @@ and, > (with { tree op_type = TREE_TYPE (@5); } > (if (inverse_conditions_p (@0, @2) > && element_precision (type) == element_precision (op_type)) > - (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1))))))) > + (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1)))))) > + (simplify > + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 (view_convert:op_type @1) @3 @4)))) > + (simplify > + (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 (view_convert:op_type @2) @4)))) > + (simplify > + (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 (convert:op_type @1) @3 @4)))) > + (simplify > + (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 (convert:op_type @2) @4))))) > > /* Same for ternary operations. */ > (for cond_op (COND_TERNARY) > @@ -9770,7 +9805,37 @@ and, > (with { tree op_type = TREE_TYPE (@6); } > (if (inverse_conditions_p (@0, @2) > && element_precision (type) == element_precision (op_type)) > - (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1))))))) > + (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1)))))) > + (simplify > + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 (view_convert:op_type @1) @3 @4 @5)))) > + (simplify > + (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 (view_convert:op_type @2) @4 @5)))) > + (simplify > + (cond_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 @2 (view_convert:op_type @3) @5)))) > + (simplify > + (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 (convert:op_type @1) @3 @4 @5)))) > + (simplify > + (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 (convert:op_type @2) @4 @5)))) > + (simplify > + (cond_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_op @0 @1 @2 (convert:op_type @3) @5))))) > > /* Detect cases in which a VEC_COND_EXPR effectively replaces the > "else" value of an IFN_COND_LEN_*. */ > @@ -9785,7 +9850,27 @@ and, > (with { tree op_type = TREE_TYPE (@5); } > (if (inverse_conditions_p (@0, @2) > && element_precision (type) == element_precision (op_type)) > - (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7)))))) > + (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7))))) > + (simplify > + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6)))) > + (simplify > + (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6)))) > + (simplify > + (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6)))) > + (simplify > + (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6))))) > > /* Same for ternary operations. */ > (for cond_len_op (COND_LEN_TERNARY) > @@ -9799,7 +9884,37 @@ and, > (with { tree op_type = TREE_TYPE (@6); } > (if (inverse_conditions_p (@0, @2) > && element_precision (type) == element_precision (op_type)) > - (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 > @8)))))) > + (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 > @8))))) > + (simplify > + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6 @7)))) > + (simplify > + (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6 @7)))) > + (simplify > + (cond_len_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 @2 (view_convert:op_type @3) @5 @6 @7)))) > + (simplify > + (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@3); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6 @7)))) > + (simplify > + (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6 @7)))) > + (simplify > + (cond_len_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5 @6 @7) > + (with { tree op_type = TREE_TYPE (@1); } > + (if (element_precision (type) == element_precision (op_type)) > + (cond_len_op @0 @1 @2 (convert:op_type @3) @5 @6 @7))))) > > /* Detect simplication for a conditional reduction where > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c > new file mode 100644 > index 00000000000..29e55705a7a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options { -O3 -march=rv64gcv_zvl256b -mabi=lp64d } } */ > + > +short d[19]; > +_Bool e[100][19][19]; > +_Bool f[10000]; > + > +int main() > +{ > + for (long g = 0; g < 19; ++g) > + d[g] = 3; > + _Bool(*h)[19][19] = e; > + for (short g = 0; g < 9; g++) > + for (int i = 4; i < 16; i += 3) > + f[i * 9 + g] = d[i] ? d[i] : h[g][i][2]; > + for (long i = 120; i < 122; ++i) > + __builtin_printf("%d\n", f[i]); > +} > + > +/* { dg-final { scan-assembler-times {vmv.v.i\s*v[0-9]+,0} 3 } } */ > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index 57992b6deca..c0c7013c817 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -124,6 +124,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-vectorizer.h" > #include "tree-eh.h" > #include "cgraph.h" > +#include "tree-dfa.h" > > /* For lang_hooks.types.type_for_mode. */ > #include "langhooks.h" > @@ -2453,11 +2454,14 @@ mask_exists (int size, const vec<int> &vec) > that does so. */ > > static gimple * > -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) > +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask, > + tree override_lhs = NULL_TREE) > { > gcall *new_stmt; > > - tree lhs = gimple_assign_lhs (stmt); > + tree lhs = override_lhs; > + if (!lhs) > + lhs = gimple_assign_lhs (stmt); > tree rhs = gimple_assign_rhs1 (stmt); > tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; > mark_addressable (ref); > @@ -2789,11 +2793,52 @@ predicate_statements (loop_p loop) > vect_masks.safe_push (mask); > } > if (gimple_assign_single_p (stmt)) > - new_stmt = predicate_load_or_store (&gsi, stmt, mask); > - else > - new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); > + { > + bool target_has_else > + = TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO; > + tree lhs = gimple_get_lhs (stmt); > + > + bool is_load = TREE_CODE (lhs) == SSA_NAME; > + > + gimple_seq stmts2 = NULL; > + tree tmplhs = is_load && target_has_else > + ? make_temp_ssa_name (TREE_TYPE (lhs), NULL, > + "_ifc_") : lhs; > + gimple *call_stmt > + = predicate_load_or_store (&gsi, stmt, mask, tmplhs); > + > + gimple_seq_add_stmt (&stmts2, call_stmt); > + > + if (lhs != tmplhs) > + ssa_names.add (tmplhs); > > - gsi_replace (&gsi, new_stmt, true); > + if (is_load && target_has_else) > + { > + unsigned nops = gimple_call_num_args (call_stmt); > + tree *ops = XALLOCAVEC (tree, nops); > + > + for (unsigned i = 0; i < nops; i++) > + ops[i] = gimple_call_arg (call_stmt, i); > + > + tree els_operand > + = targetm.preferred_else_value (IFN_MASK_LOAD, > + TREE_TYPE (lhs), > + nops, ops); > + > + tree rhs > + = fold_build_cond_expr (TREE_TYPE (tmplhs), > + mask, tmplhs, els_operand); > + gassign *cond_stmt > + = gimple_build_assign (gimple_get_lhs (stmt), rhs); > + gimple_seq_add_stmt (&stmts2, cond_stmt); > + } > + gsi_replace_with_seq (&gsi, stmts2, true); > + } > + else > + { > + new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); > + gsi_replace (&gsi, new_stmt, true); > + } > } > else if (((lhs = gimple_assign_lhs (stmt)), true) > && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 57085488722..65e6f91fe8b 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -3193,6 +3193,14 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree > op2) > /* Follow all SSA edges so that we generate FMS, FNMA and FNMS > regardless of where the negation occurs. */ > gimple *orig_stmt = gsi_stmt (gsi); > + if (fold_stmt (&gsi, follow_all_ssa_edges)) > + { > + if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) > + gcc_unreachable (); > + update_stmt (gsi_stmt (gsi)); > + } > + /* Fold the result again. */ > + orig_stmt = gsi_stmt (gsi); > if (fold_stmt (&gsi, follow_all_ssa_edges)) > { > if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)