From: Lingling Kong <lingling.k...@intel.com>

Hi,

APX CFCMOV feature implements conditionally faulting which means
that all memory faults are suppressed when the condition code
evaluates to false and load or store a memory operand. Now we
could load or store a memory operand may trap or fault for
conditional move.

In middle-end, now we don't support a conditional move if we knew
that a load from A or B could trap or fault. To enable CFCMOV, we
use mask_load and mask_store as a proxy for backend expander. The
predicate of mask_load/mask_store is recognized as comparison rtx
in the inital implementation.

Conditional move suppress_fault for condition mem store would not
move any arithmetic calculations. For condition mem load now just
support a conditional move one trap mem and one no trap and no mem
cases.

As Richard suggests we postpond the patch to GCC16, and we hope someone
who is more familiar with rtl ifcvt pass can help review the
implementation.  

Bootstrapped & regtested on x86_64-pc-linux-gnu and aarch64-linux-gnu.

gcc/ChangeLog:

        * ifcvt.cc (can_use_mask_load_store):  New function to check
        wheter conditional fault load store .
        (noce_try_cmove_arith): Relax the condition for operand
        may_trap_or_fault check, expand with mask_load/mask_store optab
        for one of the cmove operand may trap or fault.
        (noce_process_if_block): Allow trap_or_fault dest for
        "if (...)" *x = a; else skip" scenario when mask_store optab is
        available.
        * optabs.h (emit_mask_load_store): New declaration.
        * optabs.cc (emit_mask_load_store): New function to emit
        conditional move with mask_load/mask_store optab.
---
 gcc/ifcvt.cc  | 110 ++++++++++++++++++++++++++++++++++++++++++--------
 gcc/optabs.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/optabs.h  |   3 ++
 3 files changed, 200 insertions(+), 16 deletions(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index a0c6575e4e4..1337495f7c4 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -785,6 +785,7 @@ static bool noce_try_store_flag_mask (struct noce_if_info 
*);
 static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
                            rtx, rtx, rtx, rtx = NULL, rtx = NULL);
 static bool noce_try_cmove (struct noce_if_info *);
+static bool can_use_mask_load_store (struct noce_if_info *);
 static bool noce_try_cmove_arith (struct noce_if_info *);
 static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
 static bool noce_try_minmax (struct noce_if_info *);
@@ -2136,6 +2137,39 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
   return true;
 }
 
+/* Return TRUE if backend supports scalar maskload_optab
+   or maskstore_optab, who suppresses memory faults when trying to
+   load or store a memory operand and the condition code evaluates
+   to false.
+   Currently the following forms
+       "if (test) *x = a; else skip;" --> mask_store
+       "if (test) x = *a; else x = b;" --> mask_load
+       "if (test) x = a; else x = *b;" --> mask_load
+   are supported.  */
+
+static bool
+can_use_mask_load_store (struct noce_if_info *if_info)
+{
+  rtx b = if_info->b;
+  rtx x = if_info->x;
+  rtx cond = if_info->cond;
+
+  if (MEM_P (x))
+    {
+      if (convert_optab_handler (maskstore_optab, GET_MODE (x),
+                                GET_MODE (cond)) == CODE_FOR_nothing)
+       return false;
+
+      if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
+       return false;
+
+      return true;
+    }
+  else
+    return convert_optab_handler (maskload_optab, GET_MODE (x),
+                                 GET_MODE (cond)) != CODE_FOR_nothing;
+}
+
 /* Try more complex cases involving conditional_move.  */
 
 static bool
@@ -2155,6 +2189,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   enum rtx_code code;
   rtx cond = if_info->cond;
   rtx_insn *ifcvt_seq;
+  bool a_may_trap_or_fault = may_trap_or_fault_p (a);
+  bool b_may_trap_or_fault = may_trap_or_fault_p (b);
+  bool use_mask_load_store = false;
 
   /* A conditional move from two memory sources is equivalent to a
      conditional on their addresses followed by a load.  Don't do this
@@ -2171,11 +2208,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
       x = gen_reg_rtx (address_mode);
       is_mem = true;
     }
-
-  /* ??? We could handle this if we knew that a load from A or B could
-     not trap or fault.  This is also true if we've already loaded
-     from the address along the path from ENTRY.  */
-  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
+  /* We could not handle the case that a and b may both trap or
+     fault.  */
+  else if (a_may_trap_or_fault && b_may_trap_or_fault)
+    return false;
+  /* Scalar maskload_optab/maskstore_optab implies conditionally
+     faulting, which means that if the condition mask evaluates to
+     false, all memory faults are suppressed when load or store a
+     memory operand. So if scalar_mask_load or store enabled, we could
+     do the conversion when one of a/b may trap or fault.  */
+  else if (((MEM_P (a) && a_may_trap_or_fault
+            && !b_may_trap_or_fault)
+           || (MEM_P (b) && b_may_trap_or_fault
+               && !a_may_trap_or_fault))
+          && can_use_mask_load_store (if_info))
+    use_mask_load_store = true;
+  else if (a_may_trap_or_fault || b_may_trap_or_fault)
     return false;
 
   /* if (test) x = a + b; else x = c - d;
@@ -2216,6 +2264,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
          std::swap (insn_a, insn_b);
          std::swap (a_simple, b_simple);
          std::swap (then_bb, else_bb);
+         std::swap (a_may_trap_or_fault, b_may_trap_or_fault);
        }
     }
 
@@ -2251,9 +2300,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   /* If either operand is complex, load it into a register first.
      The best way to do this is to copy the original insn.  In this
      way we preserve any clobbers etc that the insn may have had.
-     This is of course not possible in the IS_MEM case.  */
+     This is of course not possible in the IS_MEM case.
+     For load or store a operands may trap or fault, should not
+     hoist the load or store, otherwise it unable to suppress memory
+     fault, it just a normal arithmetic insn insteads of conditional
+     faulting movcc.  */
 
-  if (! general_operand (a, GET_MODE (a)) || tmp_a)
+  if (!a_may_trap_or_fault
+      && (! general_operand (a, GET_MODE (a)) || tmp_a))
     {
 
       if (is_mem)
@@ -2282,7 +2336,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
        }
     }
 
-  if (! general_operand (b, GET_MODE (b)) || tmp_b)
+  if (!b_may_trap_or_fault
+      && (! general_operand (b, GET_MODE (b)) || tmp_b))
     {
       if (is_mem)
        {
@@ -2360,8 +2415,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
   else
     goto end_seq_and_fail;
 
-  target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),
-                           a, b);
+  if (use_mask_load_store)
+    target = emit_mask_load_store (x, code, XEXP (cond, 0),
+                                  XEXP (cond, 1), a, b);
+  else
+    target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0),
+                             XEXP (cond, 1), a, b);
 
   if (! target)
     goto end_seq_and_fail;
@@ -4214,12 +4273,31 @@ noce_process_if_block (struct noce_if_info *if_info)
     }
 
   if (!set_b && MEM_P (orig_x))
-    /* We want to avoid store speculation to avoid cases like
-        if (pthread_mutex_trylock(mutex))
-          ++global_variable;
-       Rather than go to much effort here, we rely on the SSA optimizers,
-       which do a good enough job these days.  */
-    return false;
+    {
+      /* When target support scalar mask_store, i.e. x86 cfcmov,
+        we could do conditonal mem store for "if (...) *x = a; else skip"
+        where x may trap or fault.  */
+      if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
+                                 GET_MODE (cond)) != CODE_FOR_nothing)
+         && HAVE_conditional_move
+         && may_trap_or_fault_p (orig_x)
+         && register_operand (a, GET_MODE (orig_x)))
+       {
+         x = orig_x;
+         if_info->x = x;
+         if (noce_try_cmove_arith (if_info))
+           goto success;
+         else
+           return false;
+       }
+      /* We want to avoid store speculation to avoid cases like
+          if (pthread_mutex_trylock(mutex))
+            ++global_variable;
+        Rather than go to much effort here, we rely on the SSA optimizers,
+        which do a good enough job these days.  */
+      else
+       return false;
+    }
 
   if (noce_try_move (if_info))
     goto success;
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 5c9450f6145..86986128e03 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5197,6 +5197,109 @@ emit_conditional_move_1 (rtx target, rtx comparison,
   return NULL_RTX;
 }
 
+/* Emit a conditional move instruction using mask_load or mask_store optab,
+   which allows one of move source or dest be a may_trap_or_falut memory.
+   The input operands have similar meaning as emit_conditional_move, except
+   the pred parameter can be a mask predicate.  */
+
+rtx
+emit_mask_load_store (rtx target, enum rtx_code code, rtx cmp_a,
+                     rtx cmp_b, rtx vfalse, rtx vtrue, rtx pred)
+{
+  enum insn_code icode;
+
+  bool unsignedp = (code == LTU || code == GEU
+                   || code == LEU || code == GTU);
+
+  bool maskstore_p = MEM_P (target);
+  bool restore_stack = false;
+  saved_pending_stack_adjust save;
+  rtx_insn *last = get_last_insn ();
+
+  /* If pred doesn't exist, prepare compare insn using cmp_a and
+     cmp_b as predicate.  */
+  if (pred == NULL_RTX)
+    {
+      if (! general_operand (cmp_a, GET_MODE (cmp_a))
+         || ! general_operand (cmp_b, GET_MODE (cmp_b)))
+       return NULL_RTX;
+
+      if (swap_commutative_operands_p (cmp_a, cmp_b))
+       {
+         std::swap (cmp_a, cmp_b);
+         code = swap_condition (code);
+       }
+
+      /* get_condition will prefer to generate LT and GT even if the old
+        comparison was against zero, so undo that canonicalization here
+        since comparisons against zero are cheaper.  */
+
+      if (code == LT && cmp_b == const1_rtx)
+       code = LE, cmp_b = const0_rtx;
+      else if (code == GT && cmp_b == constm1_rtx)
+       code = GE, cmp_b = const0_rtx;
+
+      code = unsignedp ? unsigned_condition (code) : code;
+      rtx comparison = simplify_gen_relational (code, VOIDmode,
+                                               VOIDmode, cmp_a, cmp_b);
+
+      if (!COMPARISON_P (comparison))
+       return NULL_RTX;
+
+      save_pending_stack_adjust (&save);
+      do_pending_stack_adjust ();
+
+      machine_mode cmode = VOIDmode;
+      prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
+                       GET_CODE (comparison), NULL_RTX, unsignedp,
+                       OPTAB_WIDEN, &comparison, &cmode);
+
+      if (!comparison)
+       restore_stack = true;
+
+      pred = comparison;
+    }
+
+  if (pred)
+    {
+      machine_mode op_mode = GET_MODE (target),
+                  cond_mode = GET_MODE (pred);
+
+      if (maskstore_p)
+       icode = convert_optab_handler (maskstore_optab, op_mode,
+                                      cond_mode);
+      else
+       icode = convert_optab_handler (maskload_optab, op_mode,
+                                      cond_mode);
+
+      if (icode != CODE_FOR_nothing)
+       {
+         class expand_operand ops[4];
+         int opnum = 3;
+
+         create_output_operand (&ops[0], target, op_mode);
+         create_input_operand (&ops[1], vtrue, op_mode);
+         create_fixed_operand (&ops[2], pred);
+
+         if (!maskstore_p)
+           {
+             create_input_operand (&ops[3], vfalse, op_mode);
+             opnum = 4;
+           }
+
+         if (maybe_expand_insn (icode, opnum, ops))
+           return target;
+       }
+    }
+
+  if (restore_stack)
+    {
+      delete_insns_since (last);
+      restore_pending_stack_adjust (&save);
+    }
+
+  return NULL_RTX;
+}
 
 /* Emit a conditional negate or bitwise complement using the
    negcc or notcc optabs if available.  Return NULL_RTX if such operations
diff --git a/gcc/optabs.h b/gcc/optabs.h
index a8b0e93d60b..babc2627845 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -292,6 +292,9 @@ extern void emit_indirect_jump (rtx);
 rtx emit_conditional_move (rtx, rtx_comparison, rtx, rtx, machine_mode, int);
 rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
 
+/* Emit a mask_load or mask_store operation.  */
+rtx emit_mask_load_store (rtx, rtx_code, rtx, rtx, rtx, rtx, rtx = NULL);
+
 /* Emit a conditional negate or bitwise complement operation.  */
 rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
                                         rtx, rtx);
-- 
2.31.1

Reply via email to