https://gcc.gnu.org/g:c989e59fc99d994159114304d4e715c72bedff0a

commit r15-1058-gc989e59fc99d994159114304d4e715c72bedff0a
Author: Hongyu Wang <hongyu.w...@intel.com>
Date:   Wed Mar 27 10:13:06 2024 +0800

    [APX CCMP] Support APX CCMP
    
    APX CCMP feature implements conditional compare which executes compare
    when EFLAGS matches certain condition.
    
    CCMP introduces default flags value (dfv), when conditional compare does
    not execute, it will directly set the flags according to dfv.
    
    The instruction goes like
    
    ccmpeq {dfv=sf,of,cf,zf}  %rax, %r16
    
    For this instruction, it will test EFLAGS regs if it matches conditional
    code EQ, if yes, compare %rax and %r16 like legacy cmp. If no, the
    EFLAGS will be updated according to dfv, which means SF,OF,CF,ZF are
    set. PF will be set according to CF in dfv, and AF will always be
    cleared.
    
    The dfv part can be a combination of sf,of,cf,zf, like {dfv=cf,zf} which
    sets CF and ZF only and clear others, or {dfv=} which clears all EFLAGS.
    
    To enable CCMP, we implemented the target hook TARGET_GEN_CCMP_FIRST and
    TARGET_GEN_CCMP_NEXT to reuse the current ccmp infrastructure. Also we
    extended the cstorem4 optab to support storing different CCmode to fit
    current ccmp infrasturcture.
    
    gcc/ChangeLog:
    
            * config/i386/i386-expand.cc (ix86_gen_ccmp_first): New function
            that test if the first compare can be generated.
            (ix86_gen_ccmp_next): New function to emit a simgle compare and ccmp
            sequence.
            * config/i386/i386-opts.h (enum apx_features): Add apx_ccmp.
            * config/i386/i386-protos.h (ix86_gen_ccmp_first): New proto
            declare.
            (ix86_gen_ccmp_next): Likewise.
            (ix86_get_flags_cc): Likewise.
            * config/i386/i386.cc (ix86_flags_cc): New enum.
            (ix86_ccmp_dfv_mapping): New string array to map conditional
            code to dfv.
            (ix86_print_operand): Handle special dfv flag for CCMP.
            (ix86_get_flags_cc): New function to return x86 CC enum.
            (TARGET_GEN_CCMP_FIRST): Define.
            (TARGET_GEN_CCMP_NEXT): Likewise.
            * config/i386/i386.h (TARGET_APX_CCMP): Define.
            * config/i386/i386.md (@ccmp<mode>): New define_insn to support
            ccmp.
            (UNSPEC_APX_DFV): New unspec for ccmp dfv.
            (ALL_CC): New mode iterator.
            (cstorecc4): Change to ...
            (cstore<mode>4) ... this, use ALL_CC to loop through all
            available CCmodes.
            * config/i386/i386.opt (apx_ccmp): Add enum value for ccmp.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/apx-ccmp-1.c: New compile test.
            * gcc.target/i386/apx-ccmp-2.c: New runtime test.

Diff:
---
 gcc/config/i386/i386-expand.cc             | 121 +++++++++++++++++++++++++++++
 gcc/config/i386/i386-opts.h                |   6 +-
 gcc/config/i386/i386-protos.h              |   5 ++
 gcc/config/i386/i386.cc                    |  50 ++++++++++++
 gcc/config/i386/i386.h                     |   1 +
 gcc/config/i386/i386.md                    |  35 ++++++++-
 gcc/config/i386/i386.opt                   |   3 +
 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c |  63 +++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c |  57 ++++++++++++++
 9 files changed, 337 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 56d29c15f9a..5353d761384 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25352,4 +25352,125 @@ ix86_expand_fast_convert_bf_to_sf (rtx val)
   return ret;
 }
 
+rtx
+ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
+                       rtx_code code, tree treeop0, tree treeop1)
+{
+  if (!TARGET_APX_CCMP)
+    return NULL_RTX;
+
+  rtx op0, op1, res;
+  machine_mode op_mode;
+
+  start_sequence ();
+  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+
+  op_mode = GET_MODE (op0);
+  if (op_mode == VOIDmode)
+    op_mode = GET_MODE (op1);
+
+  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
+       || op_mode == QImode))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  /* Canonicalize the operands according to mode.  */
+  if (!nonimmediate_operand (op0, op_mode))
+    op0 = force_reg (op_mode, op0);
+  if (!x86_64_general_operand (op1, op_mode))
+    op1 = force_reg (op_mode, op1);
+
+  *prep_seq = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+
+  res = ix86_expand_compare (code, op0, op1);
+
+  if (!res)
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+  *gen_seq = get_insns ();
+  end_sequence ();
+
+  return res;
+}
+
+rtx
+ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
+                      rtx_code cmp_code, tree treeop0, tree treeop1,
+                      rtx_code bit_code)
+{
+  if (!TARGET_APX_CCMP)
+    return NULL_RTX;
+
+  rtx op0, op1, target;
+  machine_mode op_mode, cmp_mode, cc_mode = CCmode;
+  int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
+  insn_code icode;
+  rtx_code prev_code;
+  struct expand_operand ops[5];
+  int dfv;
+
+  push_to_sequence (*prep_seq);
+  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+
+  cmp_mode = op_mode = GET_MODE (op0);
+
+  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
+       || op_mode == QImode))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  icode = code_for_ccmp (op_mode);
+
+  op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
+  op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
+  if (!op0 || !op1)
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  *prep_seq = get_insns ();
+  end_sequence ();
+
+  target = gen_rtx_REG (cc_mode, FLAGS_REG);
+  dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
+
+  prev_code = GET_CODE (prev);
+
+  if (bit_code != AND)
+    prev_code = reverse_condition (prev_code);
+  else
+    dfv = (int)(dfv ^ 1);
+
+  prev = gen_rtx_fmt_ee (prev_code, VOIDmode, XEXP (prev, 0),
+                        const0_rtx);
+
+  create_fixed_operand (&ops[0], target);
+  create_fixed_operand (&ops[1], prev);
+  create_fixed_operand (&ops[2], op0);
+  create_fixed_operand (&ops[3], op1);
+  create_fixed_operand (&ops[4], GEN_INT (dfv));
+
+  push_to_sequence (*gen_seq);
+  if (!maybe_expand_insn (icode, 5, ops))
+    {
+      end_sequence ();
+      return NULL_RTX;
+    }
+
+  *gen_seq = get_insns ();
+  end_sequence ();
+
+  return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
+}
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 60176ce609f..5fcc4927978 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,8 +140,10 @@ enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_nf = 1<< 4,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
+  apx_nf = 1 << 4,
+  apx_ccmp = 1 << 5,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd
+           | apx_ppx | apx_nf | apx_ccmp,
 };
 
 #endif
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 90712769200..aa50b897b2b 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -243,6 +243,11 @@ extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, 
rtx, enum rtx_code,
 extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx,
                                      bool, rtx_code_label *);
 extern rtx ix86_expand_fast_convert_bf_to_sf (rtx);
+extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code,
+                               tree, tree);
+extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
+                              enum rtx_code, tree, tree, enum rtx_code);
+extern int ix86_get_flags_cc (enum rtx_code);
 extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
 extern bool ix86_memtag_can_tag_addresses (void);
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a9d62c84c52..4126ab24a79 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -433,6 +433,22 @@ static bool i386_asm_output_addr_const_extra (FILE *, rtx);
 static bool ix86_can_inline_p (tree, tree);
 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
 
+typedef enum ix86_flags_cc
+{
+  X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
+  X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
+  X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
+  X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
+} ix86_cc;
+
+static const char *ix86_ccmp_dfv_mapping[] =
+{
+  "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
+  "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
+  "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
+  "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
+};
+
 
 /* Whether -mtune= or -march= were specified */
 int ix86_tune_defaulted;
@@ -13690,6 +13706,7 @@ print_reg (rtx x, int code, FILE *file)
    M -- print addr32 prefix for TARGET_X32 with VSIB address.
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
    N -- print maskz if it's constant 0 operand.
+   G -- print embedded flag for ccmp/ctest.
  */
 
 void
@@ -14083,6 +14100,14 @@ ix86_print_operand (FILE *file, rtx x, int code)
                              file);
          return;
 
+       case 'G':
+         {
+           int dfv = INTVAL (x);
+           const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
+           fputs (dfv_suffix, file);
+         }
+         return;
+
        case 'H':
          if (!offsettable_memref_p (x))
            {
@@ -16466,6 +16491,24 @@ ix86_convert_const_vector_to_integer (rtx op, 
machine_mode mode)
   return val.to_shwi ();
 }
 
+int ix86_get_flags_cc (rtx_code code)
+{
+  switch (code)
+    {
+      case NE: return X86_CCNE;
+      case EQ: return X86_CCE;
+      case GE: return X86_CCNL;
+      case GT: return X86_CCNLE;
+      case LE: return X86_CCLE;
+      case LT: return X86_CCL;
+      case GEU: return X86_CCNB;
+      case GTU: return X86_CCNBE;
+      case LEU: return X86_CCBE;
+      case LTU: return X86_CCB;
+      default: return -1;
+    }
+}
+
 /* Return TRUE or FALSE depending on whether the first SET in INSN
    has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
@@ -26951,6 +26994,13 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_TAG_SIZE
 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
 
+#undef TARGET_GEN_CCMP_FIRST
+#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
+
+#undef TARGET_GEN_CCMP_NEXT
+#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
+
+
 static bool
 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 969391d3013..7051c6c13e4 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -56,6 +56,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
 #define TARGET_APX_NF (ix86_apx_features & apx_nf)
+#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ef83984d00e..ffcf63e1cba 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -217,6 +217,10 @@
 
   ;; For APX PPX support
   UNSPEC_APX_PPX
+
+  ;; For APX CCMP support
+  ;; DFV = default flag value
+  UNSPEC_APX_DFV
 ])
 
 (define_c_enum "unspecv" [
@@ -1505,6 +1509,25 @@
   DONE;
 })
 
+(define_insn "@ccmp<mode>"
+ [(set (match_operand:CC 0 "flags_reg_operand")
+       (if_then_else:CC
+        (match_operator 1 "comparison_operator"
+         [(reg:CC FLAGS_REG) (const_int 0)])
+       (compare:CC
+         (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>m,<r>")
+                    (match_operand:SWI 3 "<general_operand>" "<r><i>,<r><m>"))
+         (const_int 0))
+       (unspec:SI
+         [(match_operand:SI 4 "const_0_to_15_operand")]
+         UNSPEC_APX_DFV)))]
+ "TARGET_APX_CCMP"
+ "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"
+ [(set_attr "type" "icmp")
+  (set_attr "mode" "<MODE>")
+  (set_attr "length_immediate" "1")
+  (set_attr "prefix" "evex")])
+
 (define_expand "@cmp<mode>_1"
   [(set (reg:CC FLAGS_REG)
        (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
@@ -1851,10 +1874,18 @@
   DONE;
 })
 
-(define_expand "cstorecc4"
+;; For conditonal compare, the middle-end hook will convert
+;; CCmode to sub-CCmode using SELECT_CC_MODE macro and try
+;; to find cstore<submodes> in optab. Add ALL_CC to support
+;; the cstore after ccmp sequence.
+
+(define_mode_iterator ALL_CC
+ [CCGC CCGOC CCNO CCGZ CCA CCC CCO CCP CCS CCZ CC])
+
+(define_expand "cstore<mode>4"
   [(set (match_operand:QI 0 "register_operand")
               (match_operator 1 "comparison_operator"
-               [(match_operand 2 "flags_reg_operand")
+               [(match_operand:ALL_CC 2 "flags_reg_operand")
                 (match_operand 3 "const0_operand")]))]
   ""
 {
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index b6f28a2b4bd..7017cc87cec 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1339,6 +1339,9 @@ Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 EnumValue
 Enum(apx_features) String(nf) Value(apx_nf) Set(6)
 
+EnumValue
+Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c 
b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
new file mode 100644
index 00000000000..5a2dad89f1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
@@ -0,0 +1,63 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapx-features=ccmp" } */
+
+int
+f1 (int a)
+{
+  return a < 17 || a == 32;
+}
+
+int
+f2 (int a)
+{
+  return a > 33 || a == 18;
+}
+
+int
+f3 (int a, int b)
+{
+  return a != 19 && b > 34;
+}
+
+int
+f4 (int a, int b)
+{
+  return a < 35 && b == 20;
+}
+
+int
+f5 (short a)
+{
+  return a == 0 || a == 5;
+}
+
+int
+f6 (long long a)
+{
+  return a == 6 || a == 0;
+}
+
+int
+f7 (char a, char b)
+{
+  return a > 0 && b <= 7;
+}
+
+int
+f8 (int a, int b)
+{
+  return a == 9 && b > 0;
+}
+
+int
+f9 (int a, int b)
+{
+  a += b;
+  return a == 3 || a == 0;
+}
+
+/* { dg-final { scan-assembler-times "ccmpg" 2 } } */
+/* { dg-final { scan-assembler-times "ccmple" 2 } } */
+/* { dg-final { scan-assembler-times "ccmpne" 4 } } */
+/* { dg-final { scan-assembler-times "ccmpe" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c 
b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
new file mode 100644
index 00000000000..30a1c216c1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-O3 -mno-apxf" } */
+
+__attribute__((noinline, noclone, target("apxf")))
+int foo_apx(int a, int b, int c, int d)
+{
+  int sum = a;
+
+  if (a != c)
+    {
+      c += d;
+      a += b;
+      sum += a + c;
+      if (b != d && sum < c || sum > d)
+       {
+         b += d;
+         sum += b;
+       }
+    }
+
+  return sum;
+}
+
+__attribute__((noinline, noclone, target("no-apxf")))
+int foo_noapx(int a, int b, int c, int d)
+{
+  int sum = a;
+
+  if (a != c)
+    {
+      c += d;
+      a += b;
+      sum += a + c;
+      if (b != d && sum < c || sum > d)
+       {
+         b += d;
+         sum += b;
+       }
+    }
+
+  return sum;
+}
+
+int main (void)
+{
+  if (!__builtin_cpu_supports ("apxf"))
+    return 0;
+
+  int val1 = foo_noapx (23, 17, 32, 44);
+  int val2 = foo_apx (23, 17, 32, 44);
+
+  if (val1 != val2)
+    __builtin_abort ();
+
+  return 0;
+}

Reply via email to