> -----Original Message-----
> From: Richard Biener <richard.guent...@gmail.com>
> Sent: Friday, June 14, 2024 2:13 PM
> To: Kong, Lingling <lingling.k...@intel.com>; Richard Sandiford
> <richard.sandif...@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>; Uros
> Bizjak <ubiz...@gmail.com>
> Subject: Re: [PATCH 2/2] [APX CFCMOV] Support APX CFCMOV
>
> On Fri, Jun 14, 2024 at 3:39 AM Kong, Lingling <lingling.k...@intel.com>
> wrote:
> >
> > From: konglin1 <lingling.k...@intel.com>
> >
> >
> >
> > APX CFCMOV feature implements conditionally faulting which means that
> > all
> >
> > memory faults are suppressed when the condition code evaluates to
> > false and
> >
> > load or store a memory operand. Now we could load or store a memory
> > operand
> >
> > may trap or fault for conditional move.
> >
> >
> >
> > To enable CFCMOV, we add a target HOOK
> > TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP
> >
> > in if-conversion pass to allow convert to cmov.
> >
> >
> >
> > Bootstrapped & regtested on x86-64-pc-linux-gnu with binutils 2.42 branch.
> >
> > OK for trunk?
>
> How does if-conversion end up modifying the IL?
>
> I have the gut feeling that your hook changes semantics of RTL and you should
> instead have an optab for a "masked" load/store?
>
> Richard - do you already have plans how to represent the first-fault loads?
> (are there first-fault stores?)
Yes.
>
> Richard.
>
> >
> >
> > gcc/ChangeLog:
> >
> >
> >
> > * config/i386/i386-expand.cc (ix86_can_cfcmov_p): New
> > function that
> >
> > test if the cfcmov can be generated.
> >
> > (ix86_expand_int_movcc): Expand to cfcmov pattern if
> > ix86_can_cfcmov_p
> >
> > return ture.
> >
> > * config/i386/i386-opts.h (enum apx_features): Add
> > apx_cfcmov.
> >
> > * config/i386/i386.cc
> > (ix86_have_conditional_move_mem_notrap): New
> >
> > function to hook
> > TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP
> >
> > (TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP): Target hook
> define.
> >
> > (ix86_rtx_costs): Add UNSPEC_APX_CFCMOV cost;
> >
> > * config/i386/i386.h (TARGET_APX_CFCMOV): Define.
> >
> > * config/i386/i386.md (*cfcmov<mode>_1): New
> > define_insn to support
> >
> > cfcmov.
> >
> > (*cfcmov<mode>_2): Ditto.
> >
> > (UNSPEC_APX_CFCMOV): New unspec for cfcmov.
> >
> > * config/i386/i386.opt: Add enum value for cfcmov.
> >
> > * ifcvt.cc (noce_try_cmove_load_mem_notrap): Use target
> > hook to allow
> >
> > convert to cfcmov for conditional load.
> >
> > (noce_try_cmove_store_mem_notrap): Convert to conditional
> > store.
> >
> > (noce_process_if_block): Ditto.
> >
> >
> >
> > gcc/testsuite/ChangeLog:
> >
> >
> >
> > * gcc.target/i386/apx-cfcmov-1.c: New test.
> >
> > * gcc.target/i386/apx-cfcmov-2.c: Ditto.
> >
> > ---
> >
> > gcc/config/i386/i386-expand.cc | 63 +++++
> >
> > gcc/config/i386/i386-opts.h | 4 +-
> >
> > gcc/config/i386/i386.cc | 33 ++-
> >
> > gcc/config/i386/i386.h | 1 +
> >
> > gcc/config/i386/i386.md | 53 +++-
> >
> > gcc/config/i386/i386.opt | 3 +
> >
> > gcc/config/i386/predicates.md | 7 +
> >
> > gcc/ifcvt.cc | 247 ++++++++++++++++++-
> >
> > gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 ++++++
> >
> > gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++
> >
> > 10 files changed, 511 insertions(+), 13 deletions(-)
> >
> > create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
> >
> > create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
> >
> >
> >
> > diff --git a/gcc/config/i386/i386-expand.cc
> > b/gcc/config/i386/i386-expand.cc
> >
> > index 312329e550b..c02a4bcbec3 100644
> >
> > --- a/gcc/config/i386/i386-expand.cc
> >
> > +++ b/gcc/config/i386/i386-expand.cc
> >
> > @@ -3336,6 +3336,30 @@ ix86_expand_int_addcc (rtx operands[])
> >
> > return true;
> >
> > }
> >
> >
> >
> > +/* Return TRUE if we could convert "if (test) x = a; else x = b;" to
> > +cfcmov,
> >
> > + especially when load a or b or x store may cause memmory faults.
> > + */
> >
> > +bool
> >
> > +ix86_can_cfcmov_p (rtx x, rtx a, rtx b)
> >
> > +{
> >
> > + machine_mode mode = GET_MODE (x);
> >
> > + if (TARGET_APX_CFCMOV
> >
> > + && (mode == DImode || mode == SImode || mode == HImode))
> >
> > + {
> >
> > + /* C load (r m r), (r m C), (r r m). For r m m could use
> >
> > + two cfcmov. */
> >
> > + if (register_operand (x, mode)
> >
> > + && ((MEM_P (a) && register_operand (b, mode))
> >
> > + || (MEM_P (a) && b == const0_rtx)
> >
> > + || (register_operand (a, mode) && MEM_P (b))
> >
> > + || (MEM_P (a) && MEM_P (b))))
> >
> > + return true;
> >
> > + /* C store (m r 0). */
> >
> > + else if (MEM_P (x) && x == b && register_operand (a, mode))
> >
> > + return true;
> >
> > + }
> >
> > + return false;
> >
> > +}
> >
> > +
> >
> > bool
> >
> > ix86_expand_int_movcc (rtx operands[])
> >
> > {
> >
> > @@ -3366,6 +3390,45 @@ ix86_expand_int_movcc (rtx operands[])
> >
> >
> >
> > compare_code = GET_CODE (compare_op);
> >
> >
> >
> > + if (MEM_P (operands[0])
> >
> > + && !ix86_can_cfcmov_p (operands[0], op2, op3))
> >
> > + return false;
> >
> > +
> >
> > + if (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3))
> >
> > + {
> >
> > + if (ix86_can_cfcmov_p (operands[0], op2, op3))
> >
> > + {
> >
> > + if (may_trap_or_fault_p (op2))
> >
> > + op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1,
> > + operands[2]),
> >
> > +
> > + UNSPEC_APX_CFCMOV);
> >
> > + if (may_trap_or_fault_p (op3))
> >
> > + op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1,
> > + operands[3]),
> >
> > +
> > + UNSPEC_APX_CFCMOV);
> >
> > + emit_insn (compare_seq);
> >
> > +
> >
> > + if (may_trap_or_fault_p (op2) && may_trap_or_fault_p
> > + (op3))
> >
> > + {
> >
> > + emit_insn (gen_rtx_SET (operands[0],
> >
> > +
> > + gen_rtx_IF_THEN_ELSE (mode,
> >
> > +
> > + compare_op,
> >
> > +
> > + op2,
> >
> > +
> > + operands[0])));
> >
> > + emit_insn (gen_rtx_SET (operands[0],
> >
> > +
> > + gen_rtx_IF_THEN_ELSE (mode,
> >
> > +
> > + compare_op,
> >
> > +
> > + operands[0],
> >
> > +
> > + op3)));
> >
> > + }
> >
> > + else
> >
> > + emit_insn (gen_rtx_SET (operands[0],
> >
> > +
> > + gen_rtx_IF_THEN_ELSE (mode,
> >
> > +
> > + compare_op,
> >
> > +
> > + op2, op3)));
> >
> > + return true;
> >
> > + }
> >
> > + return false;
> >
> > + }
> >
> > +
> >
> > if ((op1 == const0_rtx && (code == GE || code == LT))
> >
> > || (op1 == constm1_rtx && (code == GT || code == LE)))
> >
> > sign_bit_compare_p = true;
> >
> > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> >
> > index c7ec0d9fd39..711519ffb53 100644
> >
> > --- a/gcc/config/i386/i386-opts.h
> >
> > +++ b/gcc/config/i386/i386-opts.h
> >
> > @@ -143,8 +143,10 @@ enum apx_features {
> >
> > apx_nf = 1 << 4,
> >
> > apx_ccmp = 1 << 5,
> >
> > apx_zu = 1 << 6,
> >
> > + apx_cfcmov = 1 << 7,
> >
> > apx_all = apx_egpr | apx_push2pop2 | apx_ndd
> >
> > - | apx_ppx | apx_nf | apx_ccmp | apx_zu,
> >
> > + | apx_ppx | apx_nf | apx_ccmp | apx_zu
> >
> > + | apx_cfcmov,
> >
> > };
> >
> >
> >
> > #endif
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> >
> > index 173db213d14..b14c0a3d9f2 100644
> >
> > --- a/gcc/config/i386/i386.cc
> >
> > +++ b/gcc/config/i386/i386.cc
> >
> > @@ -22349,10 +22349,18 @@ ix86_rtx_costs (rtx x, machine_mode mode,
> > int outer_code_i, int opno,
> >
> > *total = COSTS_N_INSNS (1);
> >
> > if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x,
> > 0)))
> >
> > *total += rtx_cost (XEXP (x, 0), mode, code, 0,
> > speed);
> >
> > - if (!REG_P (XEXP (x, 1)))
> >
> > - *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
> >
> > - if (!REG_P (XEXP (x, 2)))
> >
> > - *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
> >
> > + rtx op1, op2;
> >
> > + op1 = XEXP (x, 1);
> >
> > + op2 = XEXP (x, 2);
> >
> > + /* Handle UNSPEC_APX_CFCMOV for cfcmov. */
> >
> > + if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) ==
> > + UNSPEC_APX_CFCMOV)
> >
> > + op1 = XVECEXP (op1, 0, 0);
> >
> > + if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) ==
> > + UNSPEC_APX_CFCMOV)
> >
> > + op2 = XVECEXP (op2, 0, 0);
> >
> > + if (!REG_P (op1))
> >
> > + *total += rtx_cost (op1, mode, code, 1, speed);
> >
> > + if (!REG_P (op2))
> >
> > + *total += rtx_cost (op2, mode, code, 2, speed);
> >
> > return true;
> >
> > }
> >
> > return false;
> >
> > @@ -24998,6 +25006,19 @@ ix86_noce_conversion_profitable_p (rtx_insn
> > *seq, struct noce_if_info *if_info)
> >
> > return default_noce_conversion_profitable_p (seq, if_info);
> >
> > }
> >
> >
> >
> > +
> >
> > +/* Implement targetm.have_conditional_move_mem_notrap hook. */
> >
> > +static bool
> >
> > +ix86_have_conditional_move_mem_notrap (rtx x)
> >
> > +{
> >
> > + machine_mode mode = GET_MODE (x);
> >
> > + if (TARGET_APX_CFCMOV
> >
> > + && (mode == DImode || mode == SImode || mode == HImode)
> >
> > + && MEM_P (x))
> >
> > + return true;
> >
> > + return false;
> >
> > +}
> >
> > +
> >
> > /* x86-specific vector costs. */
> >
> > class ix86_vector_costs : public vector_costs
> >
> > {
> >
> > @@ -26975,6 +26996,10 @@ ix86_libgcc_floating_mode_supported_p
> >
> > #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
> >
> > #define TARGET_NOCE_CONVERSION_PROFITABLE_P
> > ix86_noce_conversion_profitable_p
> >
> >
> >
> > +#undef TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP
> >
> > +#define TARGET_HAVE_CONDITIONAL_MOVE_MEM_NOTRAP \
> >
> > + ix86_have_conditional_move_mem_notrap
> >
> > +
> >
> > #undef TARGET_HARD_REGNO_NREGS
> >
> > #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
> >
> > #undef TARGET_HARD_REGNO_MODE_OK
> >
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >
> > index dc1a1f44320..6a20fa678c8 100644
> >
> > --- a/gcc/config/i386/i386.h
> >
> > +++ b/gcc/config/i386/i386.h
> >
> > @@ -58,6 +58,7 @@ see the files COPYING3 and COPYING.RUNTIME
> > respectively. If not, see
> >
> > #define TARGET_APX_NF (ix86_apx_features & apx_nf)
> >
> > #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
> >
> > #define TARGET_APX_ZU (ix86_apx_features & apx_zu)
> >
> > +#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov)
> >
> >
> >
> > #include "config/vxworks-dummy.h"
> >
> >
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> >
> > index fd48e764469..57448c07828 100644
> >
> > --- a/gcc/config/i386/i386.md
> >
> > +++ b/gcc/config/i386/i386.md
> >
> > @@ -221,6 +221,9 @@
> >
> > ;; For APX CCMP support
> >
> > ;; DFV = default flag value
> >
> > UNSPEC_APX_DFV
> >
> > +
> >
> > + ;; For APX CFCMOV support
> >
> > + UNSPEC_APX_CFCMOV
> >
> > ])
> >
> >
> >
> > (define_c_enum "unspecv" [
> >
> > @@ -579,7 +582,7 @@
> >
> >
> > noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
> >
> >
> > avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
> >
> >
> > avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
> >
> > - vaes_avx512vl,noapx_nf"
> >
> > + vaes_avx512vl,noapx_nf,apx_cfcmov"
> >
> > (const_string "base"))
> >
> >
> >
> > ;; The (bounding maximum) length of an instruction immediate.
> >
> > @@ -986,6 +989,7 @@
> >
> > (eq_attr "mmx_isa" "avx")
> >
> > (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
> >
> > (eq_attr "isa" "noapx_nf") (symbol_ref
> > "!TARGET_APX_NF")
> >
> > + (eq_attr "isa" "apx_cfcmov") (symbol_ref
> > + "TARGET_APX_CFCMOV")
> >
> > ]
> >
> > (const_int 1)))
> >
> >
> >
> > @@ -24995,7 +24999,7 @@
> >
> > ;; Conditional move instructions.
> >
> >
> >
> > (define_expand "mov<mode>cc"
> >
> > - [(set (match_operand:SWIM 0 "register_operand")
> >
> > + [(set (match_operand:SWIM 0 "register_or_cfc_mem_operand")
> >
> > (if_then_else:SWIM (match_operand 1
> > "comparison_operator")
> >
> > (match_operand:SWIM 2
> > "<general_operand>")
> >
> > (match_operand:SWIM 3
> > "<general_operand>")))]
> >
> > @@ -25103,19 +25107,54 @@
> >
> > (set (match_dup 0)
> >
> > (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int
> > 0))))])
> >
> >
> >
> > +(define_insn "*cfcmov<mode>_1"
> >
> > + [(set (match_operand:SWI248 0 "register_operand" "=r,r")
> >
> > + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
> >
> > + [(reg FLAGS_REG) (const_int 0)])
> >
> > + (unspec:SWI248
> >
> > + [(match_operand:SWI248 2 "memory_operand" "m,m")]
> >
> > + UNSPEC_APX_CFCMOV)
> >
> > + (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))]
> >
> > + "TARGET_CMOVE && TARGET_APX_CFCMOV"
> >
> > + "@
> >
> > + cfcmov%O2%C1\t{%2, %0|%0, %2}
> >
> > + cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}"
> >
> > + [(set_attr "isa" "*,apx_ndd")
> >
> > + (set_attr "type" "icmov")
> >
> > + (set_attr "prefix" "evex")
> >
> > + (set_attr "mode" "<MODE>")])
> >
> > +
> >
> > +(define_insn "*cfcmov<mode>_2"
> >
> > + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,m")
> >
> > + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
> >
> > + [(reg FLAGS_REG) (const_int 0)])
> >
> > + (match_operand:SWI248 2 "register_operand" "r,r")
> >
> > + (unspec:SWI248
> >
> > + [(match_operand:SWI248 3 "memory_operand" "m,0")]
> >
> > + UNSPEC_APX_CFCMOV)))]
> >
> > + "TARGET_CMOVE && TARGET_APX_CFCMOV"
> >
> > + "@
> >
> > + cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
> >
> > + cfcmov%O2%C1\t{%2, %0|%0, %2}"
> >
> > + [(set_attr "isa" "apx_ndd,*")
> >
> > + (set_attr "type" "icmov")
> >
> > + (set_attr "prefix" "evex")
> >
> > + (set_attr "mode" "<MODE>")])
> >
> > +
> >
> > (define_insn "*mov<mode>cc_noc"
> >
> > - [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
> >
> > + [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r")
> >
> > (if_then_else:SWI248 (match_operator 1
> "ix86_comparison_operator"
> >
> > [(reg FLAGS_REG)
> > (const_int 0)])
> >
> > - (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
> >
> > - (match_operand:SWI248 3 "nonimmediate_operand"
> "0,rm,r,rm")))]
> >
> > + (match_operand:SWI248 2 "nonimmediate_operand"
> > + "rm,0,rm,r,r")
> >
> > + (match_operand:SWI248 3 "nonimm_or_0_operand"
> > + "0,rm,r,rm,C")))]
> >
> > "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
> >
> > "@
> >
> > cmov%O2%C1\t{%2, %0|%0, %2}
> >
> > cmov%O2%c1\t{%3, %0|%0, %3}
> >
> > cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
> >
> > - cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
> >
> > - [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
> >
> > + cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
> >
> > + cfcmov%O2%C1\t{%2, %0|%0, %2}"
> >
> > + [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_cfcmov")
> >
> > (set_attr "type" "icmov")
> >
> > (set_attr "mode" "<MODE>")])
> >
> >
> >
> > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> >
> > index 353fffb2343..7d63d9abd95 100644
> >
> > --- a/gcc/config/i386/i386.opt
> >
> > +++ b/gcc/config/i386/i386.opt
> >
> > @@ -1345,6 +1345,9 @@ Enum(apx_features) String(ccmp)
> Value(apx_ccmp)
> > Set(7)
> >
> > EnumValue
> >
> > Enum(apx_features) String(zu) Value(apx_zu) Set(8)
> >
> >
> >
> > +EnumValue
> >
> > +Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9)
> >
> > +
> >
> > EnumValue
> >
> > Enum(apx_features) String(all) Value(apx_all) Set(1)
> >
> >
> >
> > diff --git a/gcc/config/i386/predicates.md
> > b/gcc/config/i386/predicates.md
> >
> > index 7afe3100cb7..d562e10ab41 100644
> >
> > --- a/gcc/config/i386/predicates.md
> >
> > +++ b/gcc/config/i386/predicates.md
> >
> > @@ -2322,3 +2322,10 @@
> >
> >
> >
> > return true;
> >
> > })
> >
> > +
> >
> > +;; Return true if OP is a register operand or memory_operand is only
> >
> > +;; supported under TARGET_APX_CFCMOV.
> >
> > +(define_predicate "register_or_cfc_mem_operand"
> >
> > + (ior (match_operand 0 "register_operand")
> >
> > + (and (match_code "mem")
> >
> > + (match_test "TARGET_APX_CFCMOV"))))
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> >
> > index 58ed42673e5..6e3e48af810 100644
> >
> > --- a/gcc/ifcvt.cc
> >
> > +++ b/gcc/ifcvt.cc
> >
> > @@ -783,6 +783,8 @@ static rtx noce_emit_cmove (struct noce_if_info *,
> > rtx, enum rtx_code, rtx,
> >
> > rtx, rtx, rtx, rtx =
> > NULL, rtx = NULL);
> >
> > static bool noce_try_cmove (struct noce_if_info *);
> >
> > static bool noce_try_cmove_arith (struct noce_if_info *);
> >
> > +static bool noce_try_cmove_load_mem_notrap (struct noce_if_info *);
> >
> > +static bool noce_try_cmove_store_mem_notrap (struct noce_if_info *,
> > +rtx *, rtx);
> >
> > static rtx noce_get_alt_condition (struct noce_if_info *, rtx,
> > rtx_insn **);
> >
> > static bool noce_try_minmax (struct noce_if_info *);
> >
> > static bool noce_try_abs (struct noce_if_info *);
> >
> > @@ -2401,6 +2403,237 @@ noce_try_cmove_arith (struct noce_if_info
> > *if_info)
> >
> > return false;
> >
> > }
> >
> >
> >
> > +/* When target support suppress memory fault, try more complex cases
> > +involving
> >
> > + conditional_move's source or dest may trap or fault. */
> >
> > +
> >
> > +static bool
> >
> > +noce_try_cmove_load_mem_notrap (struct noce_if_info *if_info)
> >
> > +{
> >
> > + rtx a = if_info->a;
> >
> > + rtx b = if_info->b;
> >
> > + rtx x = if_info->x;
> >
> > +
> >
> > + if (MEM_P (x))
> >
> > + return false;
> >
> > + /* Just handle a conditional move from one trap MEM + other
> > + non_trap,
> >
> > + non mem cases. */
> >
> > + if (!(MEM_P (a) ^ MEM_P (b)))
> >
> > + return false;
> >
> > + bool a_trap = may_trap_or_fault_p (a);
> >
> > + bool b_trap = may_trap_or_fault_p (b);
> >
> > +
> >
> > + if (!(a_trap ^ b_trap))
> >
> > + return false;
> >
> > + if (a_trap && (!MEM_P (a) ||
> > + !targetm.have_conditional_move_mem_notrap (a)))
> >
> > + return false;
> >
> > + if (b_trap && (!MEM_P (b) ||
> > + !targetm.have_conditional_move_mem_notrap (b)))
> >
> > + return false;
> >
> > +
> >
> > + rtx orig_b;
> >
> > + rtx_insn *insn_a, *insn_b;
> >
> > + bool a_simple = if_info->then_simple;
> >
> > + bool b_simple = if_info->else_simple;
> >
> > + basic_block then_bb = if_info->then_bb;
> >
> > + basic_block else_bb = if_info->else_bb;
> >
> > + rtx target;
> >
> > + enum rtx_code code;
> >
> > + rtx cond = if_info->cond;
> >
> > + rtx_insn *ifcvt_seq;
> >
> > +
> >
> > + /* if (test) x = *a; else x = c - d;
> >
> > + => x = c - d;
> >
> > + if (test)
> >
> > + x = *a;
> >
> > + */
> >
> > +
> >
> > + code = GET_CODE (cond);
> >
> > + insn_a = if_info->insn_a;
> >
> > + insn_b = if_info->insn_b;
> >
> > +
> >
> > + machine_mode x_mode = GET_MODE (x);
> >
> > +
> >
> > + if (!can_conditionally_move_p (x_mode))
> >
> > + return false;
> >
> > +
> >
> > + /* Because we only handle one trap MEM + other non_trap, non mem
> > + cases,
> >
> > + just move one trap MEM always in then_bb. */
> >
> > + if (noce_reversed_cond_code (if_info) != UNKNOWN)
> >
> > + {
> >
> > + bool reversep = false;
> >
> > + if (b_trap)
> >
> > + reversep = true;
> >
> > +
> >
> > + if (reversep)
> >
> > + {
> >
> > + if (if_info->rev_cond)
> >
> > + {
> >
> > + cond = if_info->rev_cond;
> >
> > + code = GET_CODE (cond);
> >
> > + }
> >
> > + else
> >
> > + code = reversed_comparison_code (cond,
> > + if_info->jump);
> >
> > + std::swap (a, b);
> >
> > + std::swap (insn_a, insn_b);
> >
> > + std::swap (a_simple, b_simple);
> >
> > + std::swap (then_bb, else_bb);
> >
> > + }
> >
> > + }
> >
> > +
> >
> > + if (then_bb && else_bb
> >
> > + && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
> > + if_info->orig_x)
> >
> > + || !bbs_ok_for_cmove_arith (else_bb, then_bb,
> > + if_info->orig_x)))
> >
> > + return false;
> >
> > +
> >
> > + start_sequence ();
> >
> > +
> >
> > + /* If one of the blocks is empty then the corresponding B or A
> > + value
> >
> > + came from the test block. The non-empty complex block that we
> > + will
> >
> > + emit might clobber the register used by B or A, so move it to a
> > + pseudo
> >
> > + first. */
> >
> > +
> >
> > + rtx tmp_b = NULL_RTX;
> >
> > +
> >
> > + /* Don't move trap mem to a pseudo. */
> >
> > + if (!may_trap_or_fault_p (b) && (b_simple || !else_bb))
> >
> > + tmp_b = gen_reg_rtx (x_mode);
> >
> > +
> >
> > + orig_b = b;
> >
> > +
> >
> > + rtx emit_a = NULL_RTX;
> >
> > + rtx emit_b = NULL_RTX;
> >
> > + rtx_insn *tmp_insn = NULL;
> >
> > + bool modified_in_a = false;
> >
> > + bool modified_in_b = false;
> >
> > + /* If either operand is complex, load it into a register first.
> >
> > + The best way to do this is to copy the original insn. In this
> >
> > + way we preserve any clobbers etc that the insn may have had.
> >
> > + This is of course not possible in the IS_MEM case. */
> >
> > +
> >
> > + if (! general_operand (b, GET_MODE (b)) || tmp_b)
> >
> > + {
> >
> > + if (insn_b)
> >
> > + {
> >
> > + b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));
> >
> > + rtx_insn *copy_of_b = as_a <rtx_insn *> (copy_rtx
> > + (insn_b));
> >
> > + rtx set = single_set (copy_of_b);
> >
> > +
> >
> > + SET_DEST (set) = b;
> >
> > + emit_b = PATTERN (copy_of_b);
> >
> > + }
> >
> > + else
> >
> > + {
> >
> > + rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx
> > + (GET_MODE (b));
> >
> > + emit_b = gen_rtx_SET (tmp_reg, b);
> >
> > + b = tmp_reg;
> >
> > + }
> >
> > + }
> >
> > +
> >
> > + if (tmp_b && then_bb)
> >
> > + {
> >
> > + FOR_BB_INSNS (then_bb, tmp_insn)
> >
> > + /* Don't check inside insn_a. We will have changed it
> > + to emit_a
> >
> > + with a destination that doesn't conflict. */
> >
> > + if (!(insn_a && tmp_insn == insn_a)
> >
> > + && modified_in_p (orig_b, tmp_insn))
> >
> > + {
> >
> > + modified_in_a = true;
> >
> > + break;
> >
> > + }
> >
> > +
> >
> > + }
> >
> > +
> >
> > + modified_in_b = emit_b != NULL_RTX && modified_in_p (a, emit_b);
> >
> > + /* If insn to set up A clobbers any registers B depends on, try to
> >
> > + swap insn that sets up A with the one that sets up B. If even
> >
> > + that doesn't help, punt. */
> >
> > + if (modified_in_a && !modified_in_b)
> >
> > + {
> >
> > + if (!noce_emit_bb (emit_b, else_bb, b_simple))
> >
> > + goto end_seq_and_fail;
> >
> > +
> >
> > + if (!noce_emit_bb (emit_a, then_bb, a_simple))
> >
> > + goto end_seq_and_fail;
> >
> > + }
> >
> > + else if (!modified_in_a)
> >
> > + {
> >
> > + if (!noce_emit_bb (emit_b, else_bb, b_simple))
> >
> > + goto end_seq_and_fail;
> >
> > +
> >
> > + if (!noce_emit_bb (emit_a, then_bb, a_simple))
> >
> > + goto end_seq_and_fail;
> >
> > + }
> >
> > + else
> >
> > + goto end_seq_and_fail;
> >
> > +
> >
> > + target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0), XEXP
> > + (cond, 1),
> >
> > + a, b);
> >
> > +
> >
> > + if (! target)
> >
> > + goto end_seq_and_fail;
> >
> > +
> >
> > + if (target != x)
> >
> > + noce_emit_move_insn (x, target);
> >
> > +
> >
> > + ifcvt_seq = end_ifcvt_sequence (if_info);
> >
> > + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq,
> > + if_info))
> >
> > + return false;
> >
> > +
> >
> > + emit_insn_before_setloc (ifcvt_seq, if_info->jump,
> >
> > + INSN_LOCATION
> > + (if_info->insn_a));
> >
> > + if_info->transform_name = "noce_try_cmove_load_mem_notrap";
> >
> > + return true;
> >
> > +
> >
> > + end_seq_and_fail:
> >
> > + end_sequence ();
> >
> > + return false;
> >
> > +}
> >
> > +
> >
> > +static bool
> >
> > +noce_try_cmove_store_mem_notrap (struct noce_if_info *if_info, rtx
> > +*x_ptr, rtx orig_x)
> >
> > +{
> >
> > + rtx a = if_info->a;
> >
> > + rtx b = if_info->b;
> >
> > + rtx x = orig_x;
> >
> > + machine_mode x_mode = GET_MODE (x);
> >
> > +
> >
> > + if (!MEM_P (x) || !rtx_equal_p (x, b))
> >
> > + return false;
> >
> > + if (!may_trap_or_fault_p (x) ||
> > + !targetm.have_conditional_move_mem_notrap (x))
> >
> > + return false;
> >
> > + if (!if_info->then_simple || !register_operand (a, x_mode))
> >
> > + return false;
> >
> > +
> >
> > + rtx cond = if_info->cond;
> >
> > + enum rtx_code code = GET_CODE (cond);
> >
> > + rtx_insn *ifcvt_seq;
> >
> > +
> >
> > + start_sequence ();
> >
> > +
> >
> > + rtx target = noce_emit_cmove (if_info, x, code, XEXP (cond, 0),
> > + XEXP (cond, 1),
> >
> > + a, b);
> >
> > +
> >
> > + if (! target)
> >
> > + goto end_seq_and_fail;
> >
> > +
> >
> > + if (target != x)
> >
> > + noce_emit_move_insn (x, target);
> >
> > +
> >
> > + ifcvt_seq = end_ifcvt_sequence (if_info);
> >
> > + if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq,
> > + if_info))
> >
> > + return false;
> >
> > +
> >
> > + emit_insn_before_setloc (ifcvt_seq, if_info->jump,
> >
> > + INSN_LOCATION
> > + (if_info->insn_a));
> >
> > + if_info->transform_name = "noce_try_cmove_load_mem_notrap";
> >
> > + if_info->x = orig_x;
> >
> > + *x_ptr = orig_x;
> >
> > + return true;
> >
> > +
> >
> > + end_seq_and_fail:
> >
> > + end_sequence ();
> >
> > + return false;
> >
> > +}
> >
> > +
> >
> > /* For most cases, the simplified condition we found is the best
> >
> > choice, but this is not the case for the min/max/abs transforms.
> >
> > For these we wish to know that it is A or B in the condition. */
> >
> > @@ -4121,12 +4354,21 @@ noce_process_if_block (struct noce_if_info
> > *if_info)
> >
> > }
> >
> >
> >
> > if (!set_b && MEM_P (orig_x))
> >
> > + {
> >
> > + /* Conditional_move_suppress_fault for condition mem store
> > + would not
> >
> > + move any arithmetic calculations. */
> >
> > + if (targetm.have_conditional_move_mem_notrap (orig_x)
> >
> > + && HAVE_conditional_move
> >
> > + && noce_try_cmove_store_mem_notrap (if_info, &x,
> > + orig_x))
> >
> > + goto success;
> >
> > + else
> >
> > /* We want to avoid store speculation to avoid cases like
> >
> > if (pthread_mutex_trylock(mutex))
> >
> > ++global_variable;
> >
> > Rather than go to much effort here, we rely on the SSA
> > optimizers,
> >
> > which do a good enough job these days. */
> >
> > - return false;
> >
> > + return false;
> >
> > + }
> >
> >
> >
> > if (noce_try_move (if_info))
> >
> > goto success;
> >
> > @@ -4160,6 +4402,9 @@ noce_process_if_block (struct noce_if_info
> > *if_info)
> >
> > if (HAVE_conditional_move
> >
> > && noce_try_cmove_arith (if_info))
> >
> > goto success;
> >
> > + if (HAVE_conditional_move
> >
> > + && noce_try_cmove_load_mem_notrap (if_info))
> >
> > + goto success;
> >
> > if (noce_try_sign_mask (if_info))
> >
> > goto success;
> >
> > }
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
> > b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
> >
> > new file mode 100644
> >
> > index 00000000000..4a1fb91b24c
> >
> > --- /dev/null
> >
> > +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
> >
> > @@ -0,0 +1,73 @@
> >
> > +/* { dg-do compile { target { ! ia32 } } } */
> >
> > +/* { dg-options "-O3 -mapxf" } */
> >
> > +
> >
> > +/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */
> >
> > +/* { dg-final { scan-assembler-times "cfcmovg" 2} } */
> >
> > +/* { dg-final { scan-assembler-times "cfcmove" 1 } } */
> >
> > +/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */
> >
> > +/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_store (int a, int b, int c, int d, int *arr)
> >
> > +{
> >
> > + if (a != b)
> >
> > + *arr = c;
> >
> > + return d;
> >
> > +
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_ndd (int a, int b, int c, int *p)
> >
> > +{
> >
> > + if (a > b)
> >
> > + return *p;
> >
> > + return c;
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_2_trap (int a, int b, int *c, int *p)
> >
> > +{
> >
> > + if (a > b)
> >
> > + return *p;
> >
> > + return *c;
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_zero (int a, int b, int c)
> >
> > +{
> >
> > + int sum = 0;
> >
> > + if (a == b)
> >
> > + return c;
> >
> > + return sum;
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_mem (int a, int b, int *p)
> >
> > +{
> >
> > + int sum = 0;
> >
> > + if (a < b )
> >
> > + sum = *p;
> >
> > + return sum;
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_arith_1 (int a, int b, int c, int *p)
> >
> > +{
> >
> > + int sum = 0;
> >
> > + if (a > b)
> >
> > + sum = *p;
> >
> > + else
> >
> > + sum = a + c;
> >
> > + return sum + 1;
> >
> > +}
> >
> > +
> >
> > +__attribute__((noinline, noclone, target("apxf")))
> >
> > +int cfc_load_arith_2 (int a, int b, int c, int *p)
> >
> > +{
> >
> > + int sum = 0;
> >
> > + if (a > b)
> >
> > + sum = a + c;
> >
> > + else
> >
> > + sum = *p;
> >
> > + return sum + 1;
> >
> > +}
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
> > b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
> >
> > new file mode 100644
> >
> > index 00000000000..2b1660f64fa
> >
> > --- /dev/null
> >
> > +++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
> >
> > @@ -0,0 +1,40 @@
> >
> > +/* { dg-do run { target { ! ia32 } } } */
> >
> > +/* { dg-require-effective-target apxf } */
> >
> > +/* { dg-options "-mapxf -march=x86-64 -O3" } */
> >
> > +
> >
> > +#include "apx-cfcmov-1.c"
> >
> > +
> >
> > +extern void abort (void);
> >
> > +
> >
> > +int main ()
> >
> > +{
> >
> > + if (!__builtin_cpu_supports ("apxf"))
> >
> > + return 0;
> >
> > +
> >
> > + int arr = 6;
> >
> > + int arr1 = 5;
> >
> > + int res = cfc_store (1, 2, 3, 4, &arr);
> >
> > + if (arr != 3 && res != 4)
> >
> > + abort ();
> >
> > + res = cfc_load_ndd (2, 1, 2, &arr);
> >
> > + if (res != 3)
> >
> > + abort ();
> >
> > + res = cfc_load_2_trap (1, 2, &arr1, &arr);
> >
> > + if (res != 5)
> >
> > + abort ();
> >
> > + res = cfc_load_zero (1, 2, 3);
> >
> > + res = cfc_load_zero (1, 2, 3);
> >
> > + if (res != 0)
> >
> > + abort ();
> >
> > + res = cfc_load_mem (2, 1, &arr);
> >
> > + if (res != 0)
> >
> > + abort ();
> >
> > + res = cfc_load_arith_1 (1, 2, 3, &arr);
> >
> > + if (res != 5)
> >
> > + abort();
> >
> > + res = cfc_load_arith_2 (2, 1, 3,&arr);
> >
> > + if (res != 6)
> >
> > + abort();
> >
> > + return 0;
> >
> > +}
> >
> > +
> >
> > --
> >
> > 2.31.1
> >
> >