On Fri, 5 Jul 2024, Robin Dapp wrote:

> Hi,
> 
> in PR115336 we have the following
> 
>   vect_patt_391 = .MASK_LEN_GATHER_LOAD (_470, vect__59, 1, { 0, ... }, { 0, 
> ... }, _482, 0);
>   vect_iftmp.44 = vect_patt_391 | { 1, ... };
>   .MASK_LEN_STORE (vectp_f.45, 8B, { -1, ... }, _482, 0, vect_iftmp.44);
> 
> which assumes that a maskload sets the masked-out elements to zero.

To me this looks like mis-applying of match.pd:6083?

Applying pattern match.pd:6083, gimple-match-1.cc:45749
gimple_simplified to iftmp.0_62 = iftmp.0_61 | _219;
new phi replacement stmt
iftmp.0_62 = iftmp.0_61 | _219;

so originally it wasn't

  iftmp.0_61 = .MASK_LOAD (_260, 8B, _259);
  iftmp.0_62 = iftmp.0_61 | _219;

but sth like

  iftmp.0_62 = <mask> ? _219 : iftmp.0_61;

and iftmp.0_61 is zero-one-valued because it is _Bool.  Shouldn't
if-conversion always turn the COND_EXPR into a .COND_IOR here?


> RVV does not guarantee this and, unfortunately, leaves it to the
> hardware implementation to do basically anything it wants (even keep
> the previous value).  In the PR this leads to us reusing a previous
> vector register and stale, nonzero values causing an invalid result.
> 
> Based on a Richard Sandiford's feedback I started with the attached
> patch - it's more an RFC in its current shape and obviously not
> tested exhaustively.
> 
> The idea is:
>  - In ifcvt emit a VCOND_MASK (mask, load_result, preferred_else_value)
>    after a MASK_LOAD if the target requires it.
>  - Elide the VCOND_MASK when there is a COND_OP with a replacing else
>    value later.
> 
> Is this, generally, reasonable or is there a better way?
> 
> My initial idea was to add an else value to MASK_LOAD.  Richard's
> concern was, though, that this might make nonzero else values
> appear inexpensive when they are actually not.
> 
> Even though I, mechanically, added match.pd patterns to catch
> the most common cases (already at a point where a separate function
> maybe in gimple-match-exports? would make more sense), there is
> still significant code-quality fallout.
> The regressing cases are often of a form where the VCOND_MASK is
> not just a conversion away but rather hidden behind some unmasked
> operation.  I'm not sure if we could ever recognize everything that
> way without descending very deep.
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>       PR middle-end/115336
> 
>       * config/riscv/riscv.cc (riscv_preferred_else_value): Add
>       MASK_LOAD.
>       * config/riscv/riscv.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO):
>       Set to true.
>       * defaults.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO): New.
>       * doc/tm.texi: Document.
>       * doc/tm.texi.in: Document.
>       * match.pd: Add patterns to allow replacing the else value of a
>       VEC_COND.
>       * tree-if-conv.cc (predicate_load_or_store): Emit a VEC_COND
>       after a MASK_LOAD if the target does not guarantee zeroing.
>       (predicate_statements): Add temporary lhs argument.
>       * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Re-fold fold
>       result.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/riscv/rvv/autovec/pr115336.c: New test.
> ---
>  gcc/config/riscv/riscv.cc                     |   2 +
>  gcc/config/riscv/riscv.h                      |   2 +
>  gcc/defaults.h                                |   4 +
>  gcc/doc/tm.texi                               |   5 +
>  gcc/doc/tm.texi.in                            |   5 +
>  gcc/match.pd                                  | 125 +++++++++++++++++-
>  .../gcc.target/riscv/rvv/autovec/pr115336.c   |  20 +++
>  gcc/tree-if-conv.cc                           |  57 +++++++-
>  gcc/tree-ssa-math-opts.cc                     |   8 ++
>  9 files changed, 217 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c
> 
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index c17141d909a..e10bc6824b9 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -11481,6 +11481,8 @@ riscv_preferred_else_value (unsigned ifn, tree 
> vectype, unsigned int nops,
>  {
>    if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
>      return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
> +  else if (ifn == IFN_MASK_LOAD)
> +    return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype));
>  
>    return default_preferred_else_value (ifn, vectype, nops, ops);
>  }
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 57910eecd3e..dc15eb5e60f 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -1264,4 +1264,6 @@ extern void riscv_remove_unneeded_save_restore_calls 
> (void);
>  /* Check TLS Descriptors mechanism is selected.  */
>  #define TARGET_TLSDESC (riscv_tls_dialect == TLS_DESCRIPTORS)
>  
> +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 1
> +
>  #endif /* ! GCC_RISCV_H */
> diff --git a/gcc/defaults.h b/gcc/defaults.h
> index 92f3e07f742..6ffbdaea229 100644
> --- a/gcc/defaults.h
> +++ b/gcc/defaults.h
> @@ -1457,6 +1457,10 @@ see the files COPYING3 and COPYING.RUNTIME 
> respectively.  If not, see
>  #define DWARF_VERSION_DEFAULT 5
>  #endif
>  
> +#ifndef TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO
> +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 0
> +#endif
> +
>  #ifndef USED_FOR_TARGET
>  /* Done this way to keep gengtype happy.  */
>  #if BITS_PER_UNIT == 8
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index be5543b72f8..2fdebe7fd21 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -12683,6 +12683,11 @@ maintainer is familiar with.
>  
>  @end defmac
>  
> +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO
> +Bla
> +
> +@end defmac
> +
>  @deftypefn {Target Hook} bool TARGET_HAVE_SPECULATION_SAFE_VALUE (bool 
> @var{active})
>  This hook is used to determine the level of target support for
>   @code{__builtin_speculation_safe_value}.  If called with an argument
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 87a7f895174..276c38325dc 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8097,6 +8097,11 @@ maintainer is familiar with.
>  
>  @end defmac
>  
> +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO
> +Bla
> +
> +@end defmac
> +
>  @hook TARGET_HAVE_SPECULATION_SAFE_VALUE
>  
>  @hook TARGET_SPECULATION_SAFE_VALUE
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..2fdc14ef56f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -9744,7 +9744,22 @@ and,
>  #endif
>  
>  /* Detect cases in which a VEC_COND_EXPR effectively replaces the
> -   "else" value of an IFN_COND_*.  */
> +   "else" value of an IFN_COND_* as well as when the IFN_COND_*
> +   replaces the else of the VEC_COND_EXPR.  */
> +(for cond_op (COND_UNARY)
> + (simplify
> +  (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 (view_convert:op_type @1) @3)))))
> +
> +(for cond_len_op (COND_LEN_UNARY)
> + (simplify
> +  (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5)))))
> +
>  (for cond_op (COND_BINARY)
>   (simplify
>    (vec_cond @0 (view_convert? (cond_op @0 @1 @2 @3)) @4)
> @@ -9756,7 +9771,27 @@ and,
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (inverse_conditions_p (@0, @2)
>          && element_precision (type) == element_precision (op_type))
> -    (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1)))))))
> +    (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1))))))
> + (simplify
> +  (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 (view_convert:op_type @1) @3 @4))))
> + (simplify
> +  (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 (view_convert:op_type @2) @4))))
> + (simplify
> +  (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 (convert:op_type @1) @3 @4))))
> + (simplify
> +  (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 (convert:op_type @2) @4)))))
>  
>  /* Same for ternary operations.  */
>  (for cond_op (COND_TERNARY)
> @@ -9770,7 +9805,37 @@ and,
>    (with { tree op_type = TREE_TYPE (@6); }
>     (if (inverse_conditions_p (@0, @2)
>          && element_precision (type) == element_precision (op_type))
> -    (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1)))))))
> +    (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1))))))
> + (simplify
> +  (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 (view_convert:op_type @1) @3 @4 @5))))
> + (simplify
> +  (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 (view_convert:op_type @2) @4 @5))))
> + (simplify
> +  (cond_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 @2 (view_convert:op_type @3) @5))))
> + (simplify
> +  (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 (convert:op_type @1) @3 @4 @5))))
> + (simplify
> +  (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 (convert:op_type @2) @4 @5))))
> + (simplify
> +  (cond_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_op @0 @1 @2 (convert:op_type @3) @5)))))
>  
>  /* Detect cases in which a VEC_COND_EXPR effectively replaces the
>     "else" value of an IFN_COND_LEN_*.  */
> @@ -9785,7 +9850,27 @@ and,
>    (with { tree op_type = TREE_TYPE (@5); }
>     (if (inverse_conditions_p (@0, @2)
>          && element_precision (type) == element_precision (op_type))
> -    (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7))))))
> +    (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7)))))
> + (simplify
> +  (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6))))
> + (simplify
> +  (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6))))
> + (simplify
> +  (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6))))
> + (simplify
> +  (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6)))))
>  
>  /* Same for ternary operations.  */
>  (for cond_len_op (COND_LEN_TERNARY)
> @@ -9799,7 +9884,37 @@ and,
>    (with { tree op_type = TREE_TYPE (@6); }
>     (if (inverse_conditions_p (@0, @2)
>          && element_precision (type) == element_precision (op_type))
> -    (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 
> @8))))))
> +    (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 
> @8)))))
> + (simplify
> +  (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6 @7))))
> + (simplify
> +  (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6 @7))))
> + (simplify
> +  (cond_len_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 @2 (view_convert:op_type @3) @5 @6 @7))))
> + (simplify
> +  (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@3); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6 @7))))
> + (simplify
> +  (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6 @7))))
> + (simplify
> +  (cond_len_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5 @6 @7)
> +  (with { tree op_type = TREE_TYPE (@1); }
> +   (if (element_precision (type) == element_precision (op_type))
> +    (cond_len_op @0 @1 @2 (convert:op_type @3) @5 @6 @7)))))
>  
>  /* Detect simplication for a conditional reduction where
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c
> new file mode 100644
> index 00000000000..29e55705a7a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options { -O3 -march=rv64gcv_zvl256b -mabi=lp64d  } } */
> +
> +short d[19];
> +_Bool e[100][19][19];
> +_Bool f[10000];
> +
> +int main()
> +{
> +  for (long g = 0; g < 19; ++g)
> +    d[g] = 3;
> +  _Bool(*h)[19][19] = e;
> +  for (short g = 0; g < 9; g++)
> +    for (int i = 4; i < 16; i += 3)
> +      f[i * 9 + g] = d[i] ? d[i] : h[g][i][2];
> +  for (long i = 120; i < 122; ++i)
> +    __builtin_printf("%d\n", f[i]);
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv.v.i\s*v[0-9]+,0} 3 } } */
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 57992b6deca..c0c7013c817 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -124,6 +124,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-vectorizer.h"
>  #include "tree-eh.h"
>  #include "cgraph.h"
> +#include "tree-dfa.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -2453,11 +2454,14 @@ mask_exists (int size, const vec<int> &vec)
>     that does so.  */
>  
>  static gimple *
> -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
> +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask,
> +                      tree override_lhs = NULL_TREE)
>  {
>    gcall *new_stmt;
>  
> -  tree lhs = gimple_assign_lhs (stmt);
> +  tree lhs = override_lhs;
> +  if (!lhs)
> +    lhs = gimple_assign_lhs (stmt);
>    tree rhs = gimple_assign_rhs1 (stmt);
>    tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
>    mark_addressable (ref);
> @@ -2789,11 +2793,52 @@ predicate_statements (loop_p loop)
>                 vect_masks.safe_push (mask);
>               }
>             if (gimple_assign_single_p (stmt))
> -             new_stmt = predicate_load_or_store (&gsi, stmt, mask);
> -           else
> -             new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> +             {
> +               bool target_has_else
> +                 = TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO;
> +               tree lhs = gimple_get_lhs (stmt);
> +
> +               bool is_load = TREE_CODE (lhs) == SSA_NAME;
> +
> +               gimple_seq stmts2 = NULL;
> +               tree tmplhs = is_load && target_has_else
> +                 ? make_temp_ssa_name (TREE_TYPE (lhs), NULL,
> +                                       "_ifc_") : lhs;
> +               gimple *call_stmt
> +                 = predicate_load_or_store (&gsi, stmt, mask, tmplhs);
> +
> +               gimple_seq_add_stmt (&stmts2, call_stmt);
> +
> +               if (lhs != tmplhs)
> +                 ssa_names.add (tmplhs);
>  
> -           gsi_replace (&gsi, new_stmt, true);
> +               if (is_load && target_has_else)
> +                 {
> +                   unsigned nops = gimple_call_num_args (call_stmt);
> +                   tree *ops = XALLOCAVEC (tree, nops);
> +
> +                   for (unsigned i = 0; i < nops; i++)
> +                     ops[i] = gimple_call_arg (call_stmt, i);
> +
> +                   tree els_operand
> +                     = targetm.preferred_else_value (IFN_MASK_LOAD,
> +                                                     TREE_TYPE (lhs),
> +                                                     nops, ops);
> +
> +                   tree rhs
> +                     = fold_build_cond_expr (TREE_TYPE (tmplhs),
> +                                             mask, tmplhs, els_operand);
> +                   gassign *cond_stmt
> +                     = gimple_build_assign (gimple_get_lhs (stmt), rhs);
> +                   gimple_seq_add_stmt (&stmts2, cond_stmt);
> +                 }
> +               gsi_replace_with_seq (&gsi, stmts2, true);
> +             }
> +           else
> +             {
> +               new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
> +               gsi_replace (&gsi, new_stmt, true);
> +             }
>           }
>         else if (((lhs = gimple_assign_lhs (stmt)), true)
>                  && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 57085488722..65e6f91fe8b 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -3193,6 +3193,14 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree 
> op2)
>        /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
>        regardless of where the negation occurs.  */
>        gimple *orig_stmt = gsi_stmt (gsi);
> +      if (fold_stmt (&gsi, follow_all_ssa_edges))
> +     {
> +       if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
> +         gcc_unreachable ();
> +       update_stmt (gsi_stmt (gsi));
> +     }
> +      /* Fold the result again.  */
> +      orig_stmt = gsi_stmt (gsi);
>        if (fold_stmt (&gsi, follow_all_ssa_edges))
>       {
>         if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to