From: Lingling Kong <[email protected]>
Hi,
Appreciated to Richard's review, the v5 patch contaings below change:
1. Separate the maskload/maskstore emit out from noce_emit_cmove, add
a new function emit_mask_load_store in optabs.cc.
2. Follow the operand order of maskload and maskstore optab and takes
cond as predicate operand with VOIDmode.
3. Cache may_trap_or_fault_p and correct the logic to ensure only one
of cmove source operand can be a may_trap_or_fault memory.
Bootstrapped & regtested on x86-64-pclinux-gnu.
OK for trunk?
APX CFCMOV feature implements conditionally faulting which means
that all memory faults are suppressed when the condition code
evaluates to false and load or store a memory operand. Now we
could load or store a memory operand may trap or fault for
conditional move.
In middle-end, now we don't support a conditional move if we knew
that a load from A or B could trap or fault. To enable CFCMOV, we
use mask_load and mask_store as a proxy for backend expander. The
predicate of mask_load/mask_store is recognized as comparison rtx
in the inital implementation.
Conditional move suppress_fault for condition mem store would not
move any arithmetic calculations. For condition mem load now just
support a conditional move one trap mem and one no trap and no mem
cases.
gcc/ChangeLog:
* ifcvt.cc (can_use_mask_load_store): New function to check
wheter conditional fault load store .
(noce_try_cmove_arith): Relax the condition for operand
may_trap_or_fault check, expand with mask_load/mask_store optab
for one of the cmove operand may trap or fault.
(noce_process_if_block): Allow trap_or_fault dest for
"if (...)" *x = a; else skip" scenario when mask_store optab is
available.
* optabs.h (emit_mask_load_store): New declaration.
* optabs.cc (emit_mask_load_store): New function to emit
conditional move with mask_load/mask_store optab.
---
gcc/ifcvt.cc | 110 ++++++++++++++++++++++++++++++++++++++++++--------
gcc/optabs.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++++
gcc/optabs.h | 3 ++
3 files changed, 200 insertions(+), 16 deletions(-)
diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index cb5597bc171..51ac398aee1 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -778,6 +778,7 @@ static bool noce_try_store_flag_mask (struct noce_if_info
*);
static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
rtx, rtx, rtx, rtx = NULL, rtx = NULL);
static bool noce_try_cmove (struct noce_if_info *);
+static bool can_use_mask_load_store (struct noce_if_info *);
static bool noce_try_cmove_arith (struct noce_if_info *);
static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
static bool noce_try_minmax (struct noce_if_info *);
@@ -2132,6 +2133,39 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
return true;
}
+/* Return TRUE if backend supports scalar maskload_optab
+ or maskstore_optab, who suppresses memory faults when trying to
+ load or store a memory operand and the condition code evaluates
+ to false.
+ Currently the following forms
+ "if (test) *x = a; else skip;" --> mask_store
+ "if (test) x = *a; else x = b;" --> mask_load
+ "if (test) x = a; else x = *b;" --> mask_load
+ are supported. */
+
+static bool
+can_use_mask_load_store (struct noce_if_info *if_info)
+{
+ rtx b = if_info->b;
+ rtx x = if_info->x;
+ rtx cond = if_info->cond;
+
+ if (MEM_P (x))
+ {
+ if (convert_optab_handler (maskstore_optab, GET_MODE (x),
+ GET_MODE (cond)) == CODE_FOR_nothing)
+ return false;
+
+ if (!rtx_equal_p (x, b) || !may_trap_or_fault_p (x))
+ return false;
+
+ return true;
+ }
+ else
+ return convert_optab_handler (maskload_optab, GET_MODE (x),
+ GET_MODE (cond)) != CODE_FOR_nothing;
+}
+
/* Try more complex cases involving conditional_move. */
static bool
@@ -2151,6 +2185,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
enum rtx_code code;
rtx cond = if_info->cond;
rtx_insn *ifcvt_seq;
+ bool a_may_trap_or_fault = may_trap_or_fault_p (a);
+ bool b_may_trap_or_fault = may_trap_or_fault_p (b);
+ bool use_mask_load_store = false;
/* A conditional move from two memory sources is equivalent to a
conditional on their addresses followed by a load. Don't do this
@@ -2167,11 +2204,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
x = gen_reg_rtx (address_mode);
is_mem = true;
}
-
- /* ??? We could handle this if we knew that a load from A or B could
- not trap or fault. This is also true if we've already loaded
- from the address along the path from ENTRY. */
- else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
+ /* We could not handle the case that a and b may both trap or
+ fault. */
+ else if (a_may_trap_or_fault && b_may_trap_or_fault)
+ return false;
+ /* Scalar maskload_optab/maskstore_optab implies conditionally
+ faulting, which means that if the condition mask evaluates to
+ false, all memory faults are suppressed when load or store a
+ memory operand. So if scalar_mask_load or store enabled, we could
+ do the conversion when one of a/b may trap or fault. */
+ else if (((MEM_P (a) && a_may_trap_or_fault
+ && !b_may_trap_or_fault)
+ || (MEM_P (b) && b_may_trap_or_fault
+ && !a_may_trap_or_fault))
+ && can_use_mask_load_store (if_info))
+ use_mask_load_store = true;
+ else if (a_may_trap_or_fault || b_may_trap_or_fault)
return false;
/* if (test) x = a + b; else x = c - d;
@@ -2212,6 +2260,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
std::swap (insn_a, insn_b);
std::swap (a_simple, b_simple);
std::swap (then_bb, else_bb);
+ std::swap (a_may_trap_or_fault, b_may_trap_or_fault);
}
}
@@ -2247,9 +2296,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
/* If either operand is complex, load it into a register first.
The best way to do this is to copy the original insn. In this
way we preserve any clobbers etc that the insn may have had.
- This is of course not possible in the IS_MEM case. */
+ This is of course not possible in the IS_MEM case.
+ For load or store a operands may trap or fault, should not
+ hoist the load or store, otherwise it unable to suppress memory
+ fault, it just a normal arithmetic insn insteads of conditional
+ faulting movcc. */
- if (! general_operand (a, GET_MODE (a)) || tmp_a)
+ if (!a_may_trap_or_fault
+ && (! general_operand (a, GET_MODE (a)) || tmp_a))
{
if (is_mem)
@@ -2278,7 +2332,8 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
}
}
- if (! general_operand (b, GET_MODE (b)) || tmp_b)
+ if (!b_may_trap_or_fault
+ && (! general_operand (b, GET_MODE (b)) || tmp_b))
{
if (is_mem)
{
@@ -2356,8 +2411,12 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
else
goto end_seq_and_fail;
- target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP (cond, 1),
- a, b);
+ if (use_mask_load_store)
+ target = emit_mask_load_store (x, code, XEXP (cond, 0),
+ XEXP (cond, 1), a, b);
+ else
+ target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0),
+ XEXP (cond, 1), a, b);
if (! target)
goto end_seq_and_fail;
@@ -4210,12 +4269,31 @@ noce_process_if_block (struct noce_if_info *if_info)
}
if (!set_b && MEM_P (orig_x))
- /* We want to avoid store speculation to avoid cases like
- if (pthread_mutex_trylock(mutex))
- ++global_variable;
- Rather than go to much effort here, we rely on the SSA optimizers,
- which do a good enough job these days. */
- return false;
+ {
+ /* When target support scalar mask_store, i.e. x86 cfcmov,
+ we could do conditonal mem store for "if (...) *x = a; else skip"
+ where x may trap or fault. */
+ if ((convert_optab_handler (maskstore_optab, GET_MODE (orig_x),
+ GET_MODE (cond)) != CODE_FOR_nothing)
+ && HAVE_conditional_move
+ && may_trap_or_fault_p (orig_x)
+ && register_operand (a, GET_MODE (orig_x)))
+ {
+ x = orig_x;
+ if_info->x = x;
+ if (noce_try_cmove_arith (if_info))
+ goto success;
+ else
+ return false;
+ }
+ /* We want to avoid store speculation to avoid cases like
+ if (pthread_mutex_trylock(mutex))
+ ++global_variable;
+ Rather than go to much effort here, we rely on the SSA optimizers,
+ which do a good enough job these days. */
+ else
+ return false;
+ }
if (noce_try_move (if_info))
goto success;
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 36f2e6af8b5..f862f74fb5e 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5215,6 +5215,109 @@ emit_conditional_move_1 (rtx target, rtx comparison,
return NULL_RTX;
}
+/* Emit a conditional move instruction using mask_load or mask_store optab,
+ which allows one of move source or dest be a may_trap_or_falut memory.
+ The input operands have similar meaning as emit_conditional_move, except
+ the pred parameter can be a mask predicate. */
+
+rtx
+emit_mask_load_store (rtx target, enum rtx_code code, rtx cmp_a,
+ rtx cmp_b, rtx vfalse, rtx vtrue, rtx pred)
+{
+ enum insn_code icode;
+
+ bool unsignedp = (code == LTU || code == GEU
+ || code == LEU || code == GTU);
+
+ bool maskstore_p = MEM_P (target);
+ bool restore_stack = false;
+ saved_pending_stack_adjust save;
+ rtx_insn *last = get_last_insn ();
+
+ /* If pred doesn't exist, prepare compare insn using cmp_a and
+ cmp_b as predicate. */
+ if (pred == NULL_RTX)
+ {
+ if (! general_operand (cmp_a, GET_MODE (cmp_a))
+ || ! general_operand (cmp_b, GET_MODE (cmp_b)))
+ return NULL_RTX;
+
+ if (swap_commutative_operands_p (cmp_a, cmp_b))
+ {
+ std::swap (cmp_a, cmp_b);
+ code = swap_condition (code);
+ }
+
+ /* get_condition will prefer to generate LT and GT even if the old
+ comparison was against zero, so undo that canonicalization here
+ since comparisons against zero are cheaper. */
+
+ if (code == LT && cmp_b == const1_rtx)
+ code = LE, cmp_b = const0_rtx;
+ else if (code == GT && cmp_b == constm1_rtx)
+ code = GE, cmp_b = const0_rtx;
+
+ code = unsignedp ? unsigned_condition (code) : code;
+ rtx comparison = simplify_gen_relational (code, VOIDmode,
+ VOIDmode, cmp_a, cmp_b);
+
+ if (!COMPARISON_P (comparison))
+ return NULL_RTX;
+
+ save_pending_stack_adjust (&save);
+ do_pending_stack_adjust ();
+
+ machine_mode cmode = VOIDmode;
+ prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
+ GET_CODE (comparison), NULL_RTX, unsignedp,
+ OPTAB_WIDEN, &comparison, &cmode);
+
+ if (!comparison)
+ restore_stack = true;
+
+ pred = comparison;
+ }
+
+ if (pred)
+ {
+ machine_mode op_mode = GET_MODE (target),
+ cond_mode = GET_MODE (pred);
+
+ if (maskstore_p)
+ icode = convert_optab_handler (maskstore_optab, op_mode,
+ cond_mode);
+ else
+ icode = convert_optab_handler (maskload_optab, op_mode,
+ cond_mode);
+
+ if (icode != CODE_FOR_nothing)
+ {
+ class expand_operand ops[4];
+ int opnum = 3;
+
+ create_output_operand (&ops[0], target, op_mode);
+ create_input_operand (&ops[1], vtrue, op_mode);
+ create_fixed_operand (&ops[2], pred);
+
+ if (!maskstore_p)
+ {
+ create_input_operand (&ops[3], vfalse, op_mode);
+ opnum = 4;
+ }
+
+ if (maybe_expand_insn (icode, opnum, ops))
+ return target;
+ }
+ }
+
+ if (restore_stack)
+ {
+ delete_insns_since (last);
+ restore_pending_stack_adjust (&save);
+ }
+
+ return NULL_RTX;
+}
/* Emit a conditional negate or bitwise complement using the
negcc or notcc optabs if available. Return NULL_RTX if such operations
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 23fa77be24e..6e211d9571a 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -293,6 +293,9 @@ extern void emit_indirect_jump (rtx);
rtx emit_conditional_move (rtx, rtx_comparison, rtx, rtx, machine_mode, int);
rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
+/* Emit a mask_load or mask_store operation. */
+rtx emit_mask_load_store (rtx, rtx_code, rtx, rtx, rtx, rtx, rtx = NULL);
+
/* Emit a conditional negate or bitwise complement operation. */
rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
rtx, rtx);
--
2.31.1