Ping, hopefully someone can help review this part

Hongyu Wang <hongyu.w...@intel.com> 于2025年5月21日周三 09:09写道:
>
> From: Lingling Kong <lingling.k...@intel.com>
>
> Hi,
>
> APX CFCMOV feature implements conditionally faulting which means
> that all memory faults are suppressed when the condition code
> evaluates to false and load or store a memory operand. Now we
> could load or store a memory operand may trap or fault for
> conditional move.
>
> In middle-end, now we don't support a conditional move if we knew
> that a load from A or B could trap or fault. To enable CFCMOV, we
> use mask_load and mask_store as a proxy for backend expander. The
> predicate of mask_load/mask_store is recognized as comparison rtx
> in the inital implementation.
>
> Conditional move suppress_fault for condition mem store would not
> move any arithmetic calculations. For condition mem load now just
> support a conditional move one trap mem and one no trap and no mem
> cases.
>
> As Richard suggests we postpond the patch to GCC16, and we hope someone
> who is more familiar with rtl ifcvt pass can help review the
> implementation.
>
> Bootstrapped & regtested on x86_64-pc-linux-gnu and aarch64-linux-gnu.
>
> gcc/ChangeLog:
>
>         * ifcvt.cc (can_use_mask_load_store):  New function to check
>         wheter conditional fault load store .
>         (noce_try_cmove_arith): Relax the condition for operand
>         may_trap_or_fault check, expand with mask_load/mask_store optab
>         for one of the cmove operand may trap or fault.
>         (noce_process_if_block): Allow trap_or_fault dest for
>         "if (...)" *x = a; else skip" scenario when mask_store optab is
>         available.
>         * optabs.h (emit_mask_load_store): New declaration.
>         * optabs.cc (emit_mask_load_store): New function to emit
>         conditional move with mask_load/mask_store optab.
> ---
>  gcc/ifcvt.cc  | 110 ++++++++++++++++++++++++++++++++++++++++++--------
>  gcc/optabs.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++
>  gcc/optabs.h  |   3 ++
>  3 files changed, 200 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> index a0c6575e4e4..1337495f7c4 100644
> --- a/gcc/ifcvt.cc
> +++ b/gcc/ifcvt.cc
> @@ -785,6 +785,7 @@ static bool noce_try_store_flag_mask (struct noce_if_info 
> *);
>  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
>                             rtx, rtx, rtx, rtx = NULL, rtx = NULL);
>  static bool noce_try_cmove (struct noce_if_info *);
> +static bool can_use_mask_load_store (struct noce_if_info *);
>  static bool noce_try_cmove_arith (struct noce_if_info *);
>  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
>  static bool noce_try_minmax (struct noce_if_info *);
> @@ -2136,6 +2137,39 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool 
> simple)
>    return true;
>  }
>
> +/* Return TRUE if backend supports scalar maskload_optab
> +   or maskstore_optab, who suppresses memory faults when trying to
> +   load or store a memory operand and the condition code evaluates
> +   to false.
> +   Currently the following forms
> +       "if (test) *x = a; else skip;" --> mask_store
> +       "if (test) x = *a; else x = b;" --> mask_load
> +       "if (test) x = a; else x = *b;" --> mask_load
> +   are supported.  */
> +
> +static bool
> +can_use_mask_load_store (struct noce_if_info *if_info)
> +{
> +  rtx b = if_info->b;
> +  rtx x = if_info->x;
> +  rtx cond = if_info->cond;
> +
> +  if (MEM_P (x))
> +    {
> +      if (convert_optab_handler (maskstore_optab, GET_MODE (x),
> +                                GET_MODE (cond)) == CODE_FOR_nothing)
> +       return false;
> +
> +      if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
> +       return false;
> +
> +      return true;
> +    }
> +  else
> +    return convert_optab_handler (maskload_optab, GET_MODE (x),
> +                                 GET_MODE (cond)) != CODE_FOR_nothing;
> +}
> +
>  /* Try more complex cases involving conditional_move.  */
>
>  static bool
> @@ -2155,6 +2189,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>    enum rtx_code code;
>    rtx cond = if_info->cond;
>    rtx_insn *ifcvt_seq;
> +  bool a_may_trap_or_fault = may_trap_or_fault_p (a);
> +  bool b_may_trap_or_fault = may_trap_or_fault_p (b);
> +  bool use_mask_load_store = false;
>
>    /* A conditional move from two memory sources is equivalent to a
>       conditional on their addresses followed by a load.  Don't do this
> @@ -2171,11 +2208,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>        x = gen_reg_rtx (address_mode);
>        is_mem = true;
>      }
> -
> -  /* ??? We could handle this if we knew that a load from A or B could
> -     not trap or fault.  This is also true if we've already loaded
> -     from the address along the path from ENTRY.  */
> -  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
> +  /* We could not handle the case that a and b may both trap or
> +     fault.  */
> +  else if (a_may_trap_or_fault && b_may_trap_or_fault)
> +    return false;
> +  /* Scalar maskload_optab/maskstore_optab implies conditionally
> +     faulting, which means that if the condition mask evaluates to
> +     false, all memory faults are suppressed when load or store a
> +     memory operand. So if scalar_mask_load or store enabled, we could
> +     do the conversion when one of a/b may trap or fault.  */
> +  else if (((MEM_P (a) && a_may_trap_or_fault
> +            && !b_may_trap_or_fault)
> +           || (MEM_P (b) && b_may_trap_or_fault
> +               && !a_may_trap_or_fault))
> +          && can_use_mask_load_store (if_info))
> +    use_mask_load_store = true;
> +  else if (a_may_trap_or_fault || b_may_trap_or_fault)
>      return false;
>
>    /* if (test) x = a + b; else x = c - d;
> @@ -2216,6 +2264,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>           std::swap (insn_a, insn_b);
>           std::swap (a_simple, b_simple);
>           std::swap (then_bb, else_bb);
> +         std::swap (a_may_trap_or_fault, b_may_trap_or_fault);
>         }
>      }
>
> @@ -2251,9 +2300,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>    /* If either operand is complex, load it into a register first.
>       The best way to do this is to copy the original insn.  In this
>       way we preserve any clobbers etc that the insn may have had.
> -     This is of course not possible in the IS_MEM case.  */
> +     This is of course not possible in the IS_MEM case.
> +     For load or store a operands may trap or fault, should not
> +     hoist the load or store, otherwise it unable to suppress memory
> +     fault, it just a normal arithmetic insn insteads of conditional
> +     faulting movcc.  */
>
> -  if (! general_operand (a, GET_MODE (a)) || tmp_a)
> +  if (!a_may_trap_or_fault
> +      && (! general_operand (a, GET_MODE (a)) || tmp_a))
>      {
>
>        if (is_mem)
> @@ -2282,7 +2336,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>         }
>      }
>
> -  if (! general_operand (b, GET_MODE (b)) || tmp_b)
> +  if (!b_may_trap_or_fault
> +      && (! general_operand (b, GET_MODE (b)) || tmp_b))
>      {
>        if (is_mem)
>         {
> @@ -2360,8 +2415,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>    else
>      goto end_seq_and_fail;
>
> -  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),
> -                           a, b);
> +  if (use_mask_load_store)
> +    target = emit_mask_load_store (x, code, XEXP (cond, 0),
> +                                  XEXP (cond, 1), a, b);
> +  else
> +    target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0),
> +                             XEXP (cond, 1), a, b);
>
>    if (! target)
>      goto end_seq_and_fail;
> @@ -4214,12 +4273,31 @@ noce_process_if_block (struct noce_if_info *if_info)
>      }
>
>    if (!set_b && MEM_P (orig_x))
> -    /* We want to avoid store speculation to avoid cases like
> -        if (pthread_mutex_trylock(mutex))
> -          ++global_variable;
> -       Rather than go to much effort here, we rely on the SSA optimizers,
> -       which do a good enough job these days.  */
> -    return false;
> +    {
> +      /* When target support scalar mask_store, i.e. x86 cfcmov,
> +        we could do conditonal mem store for "if (...) *x = a; else skip"
> +        where x may trap or fault.  */
> +      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
> +                                 GET_MODE (cond)) != CODE_FOR_nothing)
> +         && HAVE_conditional_move
> +         && may_trap_or_fault_p (orig_x)
> +         && register_operand (a, GET_MODE (orig_x)))
> +       {
> +         x = orig_x;
> +         if_info->x = x;
> +         if (noce_try_cmove_arith (if_info))
> +           goto success;
> +         else
> +           return false;
> +       }
> +      /* We want to avoid store speculation to avoid cases like
> +          if (pthread_mutex_trylock(mutex))
> +            ++global_variable;
> +        Rather than go to much effort here, we rely on the SSA optimizers,
> +        which do a good enough job these days.  */
> +      else
> +       return false;
> +    }
>
>    if (noce_try_move (if_info))
>      goto success;
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 5c9450f6145..86986128e03 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -5197,6 +5197,109 @@ emit_conditional_move_1 (rtx target, rtx comparison,
>    return NULL_RTX;
>  }
>
> +/* Emit a conditional move instruction using mask_load or mask_store optab,
> +   which allows one of move source or dest be a may_trap_or_falut memory.
> +   The input operands have similar meaning as emit_conditional_move, except
> +   the pred parameter can be a mask predicate.  */
> +
> +rtx
> +emit_mask_load_store (rtx target, enum rtx_code code, rtx cmp_a,
> +                     rtx cmp_b, rtx vfalse, rtx vtrue, rtx pred)
> +{
> +  enum insn_code icode;
> +
> +  bool unsignedp = (code == LTU || code == GEU
> +                   || code == LEU || code == GTU);
> +
> +  bool maskstore_p = MEM_P (target);
> +  bool restore_stack = false;
> +  saved_pending_stack_adjust save;
> +  rtx_insn *last = get_last_insn ();
> +
> +  /* If pred doesn't exist, prepare compare insn using cmp_a and
> +     cmp_b as predicate.  */
> +  if (pred == NULL_RTX)
> +    {
> +      if (! general_operand (cmp_a, GET_MODE (cmp_a))
> +         || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> +       return NULL_RTX;
> +
> +      if (swap_commutative_operands_p (cmp_a, cmp_b))
> +       {
> +         std::swap (cmp_a, cmp_b);
> +         code = swap_condition (code);
> +       }
> +
> +      /* get_condition will prefer to generate LT and GT even if the old
> +        comparison was against zero, so undo that canonicalization here
> +        since comparisons against zero are cheaper.  */
> +
> +      if (code == LT && cmp_b == const1_rtx)
> +       code = LE, cmp_b = const0_rtx;
> +      else if (code == GT && cmp_b == constm1_rtx)
> +       code = GE, cmp_b = const0_rtx;
> +
> +      code = unsignedp ? unsigned_condition (code) : code;
> +      rtx comparison = simplify_gen_relational (code, VOIDmode,
> +                                               VOIDmode, cmp_a, cmp_b);
> +
> +      if (!COMPARISON_P (comparison))
> +       return NULL_RTX;
> +
> +      save_pending_stack_adjust (&save);
> +      do_pending_stack_adjust ();
> +
> +      machine_mode cmode = VOIDmode;
> +      prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
> +                       GET_CODE (comparison), NULL_RTX, unsignedp,
> +                       OPTAB_WIDEN, &comparison, &cmode);
> +
> +      if (!comparison)
> +       restore_stack = true;
> +
> +      pred = comparison;
> +    }
> +
> +  if (pred)
> +    {
> +      machine_mode op_mode = GET_MODE (target),
> +                  cond_mode = GET_MODE (pred);
> +
> +      if (maskstore_p)
> +       icode = convert_optab_handler (maskstore_optab, op_mode,
> +                                      cond_mode);
> +      else
> +       icode = convert_optab_handler (maskload_optab, op_mode,
> +                                      cond_mode);
> +
> +      if (icode != CODE_FOR_nothing)
> +       {
> +         class expand_operand ops[4];
> +         int opnum = 3;
> +
> +         create_output_operand (&ops[0], target, op_mode);
> +         create_input_operand (&ops[1], vtrue, op_mode);
> +         create_fixed_operand (&ops[2], pred);
> +
> +         if (!maskstore_p)
> +           {
> +             create_input_operand (&ops[3], vfalse, op_mode);
> +             opnum = 4;
> +           }
> +
> +         if (maybe_expand_insn (icode, opnum, ops))
> +           return target;
> +       }
> +    }
> +
> +  if (restore_stack)
> +    {
> +      delete_insns_since (last);
> +      restore_pending_stack_adjust (&save);
> +    }
> +
> +  return NULL_RTX;
> +}
>
>  /* Emit a conditional negate or bitwise complement using the
>     negcc or notcc optabs if available.  Return NULL_RTX if such operations
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index a8b0e93d60b..babc2627845 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -292,6 +292,9 @@ extern void emit_indirect_jump (rtx);
>  rtx emit_conditional_move (rtx, rtx_comparison, rtx, rtx, machine_mode, int);
>  rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
>
> +/* Emit a mask_load or mask_store operation.  */
> +rtx emit_mask_load_store (rtx, rtx_code, rtx, rtx, rtx, rtx, rtx = NULL);
> +
>  /* Emit a conditional negate or bitwise complement operation.  */
>  rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
>                                          rtx, rtx);
> --
> 2.31.1
>

Reply via email to