gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_expand_int_cfmovcc): Expand
to cfcmov pattern.
* config/i386/i386-opts.h (enum apx_features): New.
* config/i386/i386-protos.h (ix86_expand_int_cfmovcc): Define.
* config/i386/i386.cc (ix86_rtx_costs): Add UNSPEC_APX_CFCMOV
cost.
* config/i386/i386.h (TARGET_APX_CFCMOV): Define.
* config/i386/i386.md (maskload<mode><mode>): New define_expand.
(maskstore<mode><mode>): Ditto.
(*cfmov<mode>cc): New define_insn.
(*cfmov<mode>cc_2): Ditto.
(*cfmov<mode>ccz): Ditto.
(UNSPEC_APX_CFCMOV): New unspec for cfcmov.
* config/i386/i386.opt: Add enum value for cfcmov.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-cfcmov-1.c: New test.
* gcc.target/i386/apx-cfcmov-2.c: Ditto.
---
gcc/config/i386/i386-expand.cc | 47 +++++++++++++
gcc/config/i386/i386-opts.h | 4 +-
gcc/config/i386/i386-protos.h | 1 +
gcc/config/i386/i386.cc | 16 +++--
gcc/config/i386/i386.h | 1 +
gcc/config/i386/i386.md | 74 +++++++++++++++++++-
gcc/config/i386/i386.opt | 3 +
gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c | 73 +++++++++++++++++++
gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c | 40 +++++++++++
9 files changed, 253 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 36011cc6b35..c956bd96edb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3529,6 +3529,53 @@ ix86_expand_int_addcc (rtx operands[])
return true;
}
+void
+ix86_expand_int_cfmovcc (rtx operands[])
+{
+ machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx_insn *compare_seq;
+ rtx compare_op;
+ rtx op0 = XEXP (operands[1], 0);
+ rtx op1 = XEXP (operands[1], 1);
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+
+ gcc_assert (may_trap_or_fault_p (op2) || may_trap_or_fault_p (op3));
+ /* For Conditional store only handle "if (test) *x = a; else skip;". */
+ if (MEM_P (operands[0]))
+ gcc_assert (operands[0] == op3);
+
+ start_sequence ();
+ compare_op = ix86_expand_compare (code, op0, op1);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ if (may_trap_or_fault_p (op2))
+ op2 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[2]),
+ UNSPEC_APX_CFCMOV);
+ if (may_trap_or_fault_p (op3))
+ op3 = gen_rtx_UNSPEC (mode, gen_rtvec (1, operands[3]),
+ UNSPEC_APX_CFCMOV);
+ emit_insn (compare_seq);
+ /* For "if (test) x = *a; else x = *b",generate 2 cfcmov. */
+ if (may_trap_or_fault_p (op2) && may_trap_or_fault_p (op3))
+ {
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ op2, operands[0])));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ operands[0], op3)));
+ }
+ /* For conditional load one mem, like "if (test) x = *a; else x = b/0."
+ and "if (test) x = b/0; else x = *b". */
+ else
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ op2, op3)));
+}
+
bool
ix86_expand_int_movcc (rtx operands[])
{
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 35542b28936..a11c800448b 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -143,8 +143,10 @@ enum apx_features {
apx_nf = 1 << 4,
apx_ccmp = 1 << 5,
apx_zu = 1 << 6,
+ apx_cfcmov = 1 << 7,
apx_all = apx_egpr | apx_push2pop2 | apx_ndd
- | apx_ppx | apx_nf | apx_ccmp | apx_zu,
+ | apx_ppx | apx_nf | apx_ccmp | apx_zu
+ | apx_cfcmov,
};
#endif
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index c1f9147769c..eacd38b5bc5 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -153,6 +153,7 @@ extern bool ix86_match_ccmode (rtx, machine_mode);
extern bool ix86_match_ptest_ccmode (rtx);
extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx);
+extern void ix86_expand_int_cfmovcc (rtx[]);
extern bool ix86_expand_int_movcc (rtx[]);
extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 473e4cbf10e..5ec5d81bf10 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22480,10 +22480,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int
outer_code_i, int opno,
*total = COSTS_N_INSNS (1);
if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
*total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
- if (!REG_P (XEXP (x, 1)))
- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
- if (!REG_P (XEXP (x, 2)))
- *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
+ rtx op1, op2;
+ op1 = XEXP (x, 1);
+ op2 = XEXP (x, 2);
+ /* Handle UNSPEC_APX_CFCMOV for cfcmov. */
+ if (GET_CODE (op1) == UNSPEC && XINT (op1, 1) == UNSPEC_APX_CFCMOV)
+ op1 = XVECEXP (op1, 0, 0);
+ if (GET_CODE (op2) == UNSPEC && XINT (op2, 1) == UNSPEC_APX_CFCMOV)
+ op2 = XVECEXP (op2, 0, 0);
+ if (!REG_P (op1))
+ *total += rtx_cost (op1, mode, code, 1, speed);
+ if (!REG_P (op2))
+ *total += rtx_cost (op2, mode, code, 2, speed);
return true;
}
return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2dcd8803a08..3c9e8d86dd5 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -58,6 +58,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If
not, see
#define TARGET_APX_NF (ix86_apx_features & apx_nf)
#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
#define TARGET_APX_ZU (ix86_apx_features & apx_zu)
+#define TARGET_APX_CFCMOV (ix86_apx_features & apx_cfcmov)
#include "config/vxworks-dummy.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e4d1c56ea54..aa2ebe4cc72 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -224,6 +224,9 @@
;; For APX CCMP support
;; DFV = default flag value
UNSPEC_APX_DFV
+
+ ;; For APX CFCMOV support
+ UNSPEC_APX_CFCMOV
])
(define_c_enum "unspecv" [
@@ -582,7 +585,7 @@
noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
- vaes_avx512vl,noapx_nf,avx10_2"
+ vaes_avx512vl,noapx_nf,avx10_2,apx_cfcmov"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -993,6 +996,7 @@
(eq_attr "mmx_isa" "avx")
(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
(eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
+ (eq_attr "isa" "apx_cfcmov") (symbol_ref "TARGET_APX_CFCMOV")
]
(const_int 1)))
@@ -25827,6 +25831,28 @@
""
"if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+(define_expand "maskload<mode><mode>"
+ [(set (match_operand:SWI248 0 "register_operand")
+ (if_then_else:SWI248 (match_operand 1 "comparison_operator")
+ (match_operand:SWI248 2 "nonimm_or_0_operand")
+ (match_operand:SWI248 3 "nonimm_or_0_operand")))]
+ "TARGET_APX_CFCMOV"
+ {
+ ix86_expand_int_cfmovcc (operands);
+ DONE;
+ })
+
+(define_expand "maskstore<mode><mode>"
+ [(set (match_operand:SWI248 0 "memory_operand")
+ (if_then_else:SWI248 (match_operand 1 "comparison_operator")
+ (match_operand:SWI248 2 "register_operand")
+ (match_operand:SWI248 3 "memory_operand")))]
+ "TARGET_APX_CFCMOV"
+ {
+ ix86_expand_int_cfmovcc (operands);
+ DONE;
+ })
+
;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
;; the register first winds up with `sbbl $0,reg', which is also weird.
;; So just document what we're doing explicitly.
@@ -25928,6 +25954,52 @@
(set (match_dup 0)
(neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
+(define_insn "*cfmov<mode>cc"
+ [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (unspec:SWI248
+ [(match_operand:SWI248 2 "memory_operand" "m,m")]
+ UNSPEC_APX_CFCMOV)
+ (match_operand:SWI248 3 "reg_or_0_operand" "C,r")))]
+ "TARGET_CMOVE && TARGET_APX_CFCMOV"
+ "@
+ cfcmov%O2%C1\t{%2, %0|%0, %2}
+ cfcmov%O2%C1\t{%2, %3, %0|%0, %3, %2}"
+ [(set_attr "isa" "*,apx_ndd")
+ (set_attr "type" "icmov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*cfmov<mode>cc_2"
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,r,m")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SWI248 2 "reg_or_0_operand" "r,C,r")
+ (unspec:SWI248
+ [(match_operand:SWI248 3 "memory_operand" "m,m,0")]
+ UNSPEC_APX_CFCMOV)))]
+ "TARGET_CMOVE && TARGET_APX_CFCMOV"
+ "@
+ cfcmov%O2%c1\t{%3, %2, %0|%0, %2, %3}
+ cfcmov%O2%c1\t{%3, %0|%0, %3}
+ cfcmov%O2%C1\t{%2, %0|%0, %2}"
+ [(set_attr "isa" "apx_ndd,*,*")
+ (set_attr "type" "icmov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*cfmov<mode>ccz"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SWI248 2 "register_operand" "r")
+ (match_operand:SWI248 3 "const0_operand" "C")))]
+ "TARGET_CMOVE && TARGET_APX_CFCMOV"
+ "cfcmov%O2%C1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*mov<mode>cc_noc"
[(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 64c295d344c..03fcd14918f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1345,6 +1345,9 @@ Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
EnumValue
Enum(apx_features) String(zu) Value(apx_zu) Set(8)
+EnumValue
+Enum(apx_features) String(cfcmov) Value(apx_cfcmov) Set(9)
+
EnumValue
Enum(apx_features) String(all) Value(apx_all) Set(1)
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
new file mode 100644
index 00000000000..4a1fb91b24c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-1.c
@@ -0,0 +1,73 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mapxf" } */
+
+/* { dg-final { scan-assembler-times "cfcmovne" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovg" 2} } */
+/* { dg-final { scan-assembler-times "cfcmove" 1 } } */
+/* { dg-final { scan-assembler-times "cfcmovl" 2 } } */
+/* { dg-final { scan-assembler-times "cfcmovle" 1 } } */
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_store (int a, int b, int c, int d, int *arr)
+{
+ if (a != b)
+ *arr = c;
+ return d;
+
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_ndd (int a, int b, int c, int *p)
+{
+ if (a > b)
+ return *p;
+ return c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_2_trap (int a, int b, int *c, int *p)
+{
+ if (a > b)
+ return *p;
+ return *c;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_zero (int a, int b, int c)
+{
+ int sum = 0;
+ if (a == b)
+ return c;
+ return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_mem (int a, int b, int *p)
+{
+ int sum = 0;
+ if (a < b )
+ sum = *p;
+ return sum;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_1 (int a, int b, int c, int *p)
+{
+ int sum = 0;
+ if (a > b)
+ sum = *p;
+ else
+ sum = a + c;
+ return sum + 1;
+}
+
+__attribute__((noinline, noclone, target("apxf")))
+int cfc_load_arith_2 (int a, int b, int c, int *p)
+{
+ int sum = 0;
+ if (a > b)
+ sum = a + c;
+ else
+ sum = *p;
+ return sum + 1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
new file mode 100644
index 00000000000..2b1660f64fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-cfcmov-2.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-mapxf -march=x86-64 -O3" } */
+
+#include "apx-cfcmov-1.c"
+
+extern void abort (void);
+
+int main ()
+{
+ if (!__builtin_cpu_supports ("apxf"))
+ return 0;
+
+ int arr = 6;
+ int arr1 = 5;
+ int res = cfc_store (1, 2, 3, 4, &arr);
+ if (arr != 3 && res != 4)
+ abort ();
+ res = cfc_load_ndd (2, 1, 2, &arr);
+ if (res != 3)
+ abort ();
+ res = cfc_load_2_trap (1, 2, &arr1, &arr);
+ if (res != 5)
+ abort ();
+ res = cfc_load_zero (1, 2, 3);
+ res = cfc_load_zero (1, 2, 3);
+ if (res != 0)
+ abort ();
+ res = cfc_load_mem (2, 1, &arr);
+ if (res != 0)
+ abort ();
+ res = cfc_load_arith_1 (1, 2, 3, &arr);
+ if (res != 5)
+ abort();
+ res = cfc_load_arith_2 (2, 1, 3,&arr);
+ if (res != 6)
+ abort();
+ return 0;
+}
+
--
2.31.1