Gently ping, it would be appreciate if anyone can help review this.
We hope this patch will not miss GCC15 for complete support on APX.

Kong, Lingling <lingling.k...@intel.com> 于2024年11月14日周四 09:50写道:

>
> Hi,
>
> Many thanks to Richard for the suggestion that conditional load is like a 
> scalar instance of maskload_optab . So this version has use maskload and 
> maskstore optab to expand and generate cfcmov in ifcvt pass.
>
> All the changes passed bootstrap & regtest x86-64-pc-linux-gnu.
> We also tested spec with SDE and passed the runtime test.
>
> Ok for trunk?
>
> APX CFCMOV[1] feature implements conditionally faulting which means that all 
> memory faults are suppressed when the condition code evaluates to false and 
> load or store a memory operand. Now we could load or store a memory operand 
> may trap or fault for conditional move.
>
> In middle-end, now we don't support a conditional move if we knew that a load 
> from A or B could trap or fault. To enable CFCMOV, use mask_load and 
> mask_store to expand.
>
> Conditional move suppress_fault for condition mem store would not move any 
> arithmetic calculations. For condition mem load now just support a 
> conditional move one trap mem and one no trap and no mem cases.
>
> [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html
>
> gcc/ChangeLog:
>
>         * ifcvt.cc (can_use_scalar_mask_store): New func for conditional
>         faulting movcc for store.
>         (can_use_scalar_mask_load_store):  New func for conditional faulting.
>         (noce_try_cmove_arith): Try to convert to conditional faulting
>         movcc.
>         (noce_process_if_block): Ditto.
>         * optabs.cc (emit_conditional_move): Handle cfmovcc.
>         (emit_conditional_move_1): Ditto.
> ---
>  gcc/ifcvt.cc  | 105 +++++++++++++++++++++++++++++++++++++++++++++-----
>  gcc/optabs.cc |  20 ++++++++++
>  2 files changed, 115 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> index 74f13a637b2..b3adee35ff5 100644
> --- a/gcc/ifcvt.cc
> +++ b/gcc/ifcvt.cc
> @@ -778,6 +778,8 @@ static bool noce_try_store_flag_mask (struct noce_if_info 
> *);
>  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
>                             rtx, rtx, rtx, rtx = NULL, rtx = NULL);
>  static bool noce_try_cmove (struct noce_if_info *);
> +static bool can_use_scalar_mask_store (rtx, rtx, rtx, bool);
> +static bool can_use_scalar_mask_load_store (struct noce_if_info *);
>  static bool noce_try_cmove_arith (struct noce_if_info *);
>  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
>  static bool noce_try_minmax (struct noce_if_info *);
> @@ -2132,6 +2134,54 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool 
> simple)
>    return true;
>  }
>
> +/* Return TRUE if we could convert "if (test) *x = a; else skip" to
> +   scalar mask store and could do conditional faulting movcc, i.e.
> +   x86 cfcmov, especially when store x may cause memmory faults and
> +   in else_bb x == b.  */
> +
> +static bool
> +can_use_scalar_mask_store (rtx x, rtx a, rtx b, bool a_simple)
> +{
> +  gcc_assert (MEM_P (x));
> +
> +  machine_mode x_mode = GET_MODE (x);
> +  if (convert_optab_handler (maskstore_optab, x_mode,
> +                            x_mode) == CODE_FOR_nothing)
> +    return false;
> +
> +  if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
> +    return false;
> +  if (!a_simple || !register_operand (a, x_mode))
> +    return false;
> +
> +  return true;
> +}
> +
> +/* Return TRUE if backend supports scalar maskload_optab/maskstore_optab,
> +   which suppressed memory faults when load or store a memory operand
> +   and the condition code evaluates to false.  */
> +
> +static bool
> +can_use_scalar_mask_load_store (struct noce_if_info *if_info)
> +{
> +  rtx a = if_info->a;
> +  rtx b = if_info->b;
> +  rtx x = if_info->x;
> +
> +  if (!MEM_P (a) && !MEM_P (b))
> +    return false;
> +
> +  if (MEM_P (x))
> +    return can_use_scalar_mask_store (x, a, b, if_info->then_simple);
> +  else
> +    /* Return TRUE if backend supports scalar maskload_optab, we could 
> convert
> +       "if (test) x = *a; else x = b;" or "if (test) x = a; else x = *b;"
> +       to conditional faulting movcc, i.e. x86 cfcmov, especially when load a
> +       or b may cause memmory faults.  */
> +    return convert_optab_handler (maskstore_optab, GET_MODE (a),
> +                                 GET_MODE (a)) != CODE_FOR_nothing;
> +}
> +
>  /* Try more complex cases involving conditional_move.  */
>
>  static bool
> @@ -2171,7 +2221,17 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>    /* ??? We could handle this if we knew that a load from A or B could
>       not trap or fault.  This is also true if we've already loaded
>       from the address along the path from ENTRY.  */
> -  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
> +  /* Just wait cse_not_expected, then convert to conditional mov on their
> +     addresses followed by a load.  */
> +  else if (may_trap_or_fault_p (a) && may_trap_or_fault_p (b))
> +    return false;
> +  /* Scalar maskload_optab/maskstore_optab implements conditionally faulting
> +     which means that if the condition code evaluates to false, all memory
> +     faults are suppressed when load or store a memory operand.  Now we could
> +     load or store a memory operand may trap or fault for conditional
> +     move.  */
> +  else if ((may_trap_or_fault_p (a) ^ may_trap_or_fault_p (b))
> +          && !can_use_scalar_mask_load_store (if_info))
>      return false;
>
>    /* if (test) x = a + b; else x = c - d;
> @@ -2247,9 +2307,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>    /* If either operand is complex, load it into a register first.
>       The best way to do this is to copy the original insn.  In this
>       way we preserve any clobbers etc that the insn may have had.
> -     This is of course not possible in the IS_MEM case.  */
> +     This is of course not possible in the IS_MEM case.
> +     For load or store a operands may trap or fault, should not
> +     hoist the load or store, otherwise it unable to suppress memory
> +     fault, it just a normal arithmetic insn insteads of conditional
> +     faulting movcc.  */
>
> -  if (! general_operand (a, GET_MODE (a)) || tmp_a)
> +  if (! may_trap_or_fault_p (a)
> +      && (! general_operand (a, GET_MODE (a)) || tmp_a))
>      {
>
>        if (is_mem)
> @@ -2278,7 +2343,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>         }
>      }
>
> -  if (! general_operand (b, GET_MODE (b)) || tmp_b)
> +  if (! may_trap_or_fault_p (b)
> +      && (! general_operand (b, GET_MODE (b)) || tmp_b))
>      {
>        if (is_mem)
>         {
> @@ -4210,12 +4276,31 @@ noce_process_if_block (struct noce_if_info *if_info)
>      }
>
>    if (!set_b && MEM_P (orig_x))
> -    /* We want to avoid store speculation to avoid cases like
> -        if (pthread_mutex_trylock(mutex))
> -          ++global_variable;
> -       Rather than go to much effort here, we rely on the SSA optimizers,
> -       which do a good enough job these days.  */
> -    return false;
> +    {
> +      /* When target support conditional faulting movcc, i.e. x86 cfcmov,
> +        we could do conditonal mem store for "if (...) *x = a; else skip"
> +        to maskstore_optab, which x may trap or fault.  */
> +      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
> +                                 GET_MODE (orig_x)) != CODE_FOR_nothing)
> +         && HAVE_conditional_move
> +         && may_trap_or_fault_p (orig_x)
> +         && register_operand (a, GET_MODE (orig_x)))
> +       {
> +         x = orig_x;
> +         if_info->x = x;
> +         if (noce_try_cmove_arith (if_info))
> +           goto success;
> +         else
> +           return false;
> +       }
> +      /* We want to avoid store speculation to avoid cases like
> +          if (pthread_mutex_trylock(mutex))
> +            ++global_variable;
> +        Rather than go to much effort here, we rely on the SSA optimizers,
> +        which do a good enough job these days.  */
> +      else
> +       return false;
> +    }
>
>    if (noce_try_move (if_info))
>      goto success;
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 03ef0c5d81d..524c766d336 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -5085,6 +5085,16 @@ emit_conditional_move (rtx target, struct 
> rtx_comparison comp,
>
>    icode = direct_optab_handler (movcc_optab, mode);
>
> +  if (may_trap_or_fault_p (target) && MEM_P (target)
> +      && convert_optab_handler (maskstore_optab, mode,
> +                               mode) != CODE_FOR_nothing)
> +    icode =  convert_optab_handler (maskstore_optab, mode, mode);
> +  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
> +          && (MEM_P (op2) || MEM_P (op3))
> +          && convert_optab_handler (maskload_optab,
> +                                    mode, mode) != CODE_FOR_nothing)
> +    icode =  convert_optab_handler (maskload_optab, mode, mode);
> +
>    if (icode == CODE_FOR_nothing)
>      return NULL_RTX;
>
> @@ -5217,6 +5227,16 @@ emit_conditional_move_1 (rtx target, rtx comparison,
>
>    icode = direct_optab_handler (movcc_optab, mode);
>
> +  if (may_trap_or_fault_p (target) && MEM_P (target)
> +      && convert_optab_handler (maskstore_optab, mode,
> +                               mode) != CODE_FOR_nothing)
> +    icode =  convert_optab_handler (maskstore_optab, mode, mode);
> +  else if ((may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
> +           && (MEM_P (op2) || MEM_P (op3))
> +           && convert_optab_handler (maskload_optab,
> +                                     mode, mode) != CODE_FOR_nothing)
> +    icode =  convert_optab_handler (maskload_optab, mode, mode);
> +
>    if (icode == CODE_FOR_nothing)
>      return NULL_RTX;
>
> --
> 2.31.1
>

Reply via email to