https://gcc.gnu.org/g:576bd309ded9dfe258023f26924c064a7bf12875

commit r15-3185-g576bd309ded9dfe258023f26924c064a7bf12875
Author: Zhang, Jun <jun.zh...@intel.com>
Date:   Mon Aug 26 10:53:54 2024 +0800

    AVX10.2: Support compare instructions
    
    gcc/ChangeLog:
    
            * config/i386/i386-expand.cc
            (ix86_ssecom_setcc): Mention behavior change on flags.
            (ix86_expand_sse_comi): Handle AVX10.2 behavior.
            (ix86_expand_sse_comi_round): Ditto.
            (ix86_expand_round_builtin): Ditto.
            (ix86_expand_builtin): Change function call.
            * config/i386/i386.md (UNSPEC_COMX): New unspec.
            * config/i386/sse.md
            (avx10_2_v<unord>comx<ssemodesuffix><round_saeonly_name>): New.
            (<sse>_<unord>comi<round_saeonly_name>): Add HFmode.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx10_2-compare-1.c: New test.
    
    Co-authored-by: Haochen Jiang <haochen.ji...@intel.com>
    Co-authored-by: Hongtao Liu <hongtao....@intel.com>

Diff:
---
 gcc/config/i386/i386-expand.cc                    | 170 ++++++++++++++++++----
 gcc/config/i386/i386.md                           |   1 +
 gcc/config/i386/sse.md                            |  18 ++-
 gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c |  21 +++
 4 files changed, 183 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 56fc433e9933..d692008ffe7e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10601,7 +10601,9 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
   rtx_code_label *label = NULL;
 
   /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
-     with NAN operands.  */
+     with NAN operands.
+     Under TARGET_AVX10_2_256, VCOMX/VUCOMX are generated instead of
+     COMI/UCOMI.  VCOMX/VUCOMX will not set ZF for NAN operands.  */
   if (check_unordered)
     {
       gcc_assert (comparison == EQ || comparison == NE);
@@ -10640,7 +10642,7 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
 
 static rtx
 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
-                     rtx target)
+                     rtx target, bool comx_ok)
 {
   rtx pat, set_dst;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -10673,11 +10675,13 @@ ix86_expand_sse_comi (const struct 
builtin_description *d, tree exp,
     case GE:
       break;
     case EQ:
-      check_unordered = true;
+      if (!TARGET_AVX10_2_256 || !comx_ok)
+       check_unordered = true;
       mode = CCZmode;
       break;
     case NE:
-      check_unordered = true;
+      if (!TARGET_AVX10_2_256 || !comx_ok)
+       check_unordered = true;
       mode = CCZmode;
       const_val = const1_rtx;
       break;
@@ -10696,6 +10700,28 @@ ix86_expand_sse_comi (const struct builtin_description 
*d, tree exp,
       || !insn_p->operand[1].predicate (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
+  if ((comparison == EQ || comparison == NE)
+      && TARGET_AVX10_2_256 && comx_ok)
+    {
+      switch (icode)
+       {
+       case CODE_FOR_sse_comi:
+         icode = CODE_FOR_avx10_2_comxsf;
+         break;
+       case CODE_FOR_sse_ucomi:
+         icode = CODE_FOR_avx10_2_ucomxsf;
+         break;
+       case CODE_FOR_sse2_comi:
+         icode = CODE_FOR_avx10_2_comxdf;
+         break;
+       case CODE_FOR_sse2_ucomi:
+         icode = CODE_FOR_avx10_2_ucomxdf;
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
+    }
   pat = GEN_FCN (icode) (op0, op1);
   if (! pat)
     return 0;
@@ -12190,7 +12216,7 @@ ix86_erase_embedded_rounding (rtx pat)
    with rounding.  */
 static rtx
 ix86_expand_sse_comi_round (const struct builtin_description *d,
-                           tree exp, rtx target)
+                           tree exp, rtx target, bool comx_ok)
 {
   rtx pat, set_dst;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -12252,6 +12278,7 @@ ix86_expand_sse_comi_round (const struct 
builtin_description *d,
     op1 = safe_vector_operand (op1, mode1);
 
   enum rtx_code comparison = comparisons[INTVAL (op2)];
+  enum rtx_code orig_comp = comparison;
   bool ordered = ordereds[INTVAL (op2)];
   bool non_signaling = non_signalings[INTVAL (op2)];
   rtx const_val = const0_rtx;
@@ -12263,10 +12290,21 @@ ix86_expand_sse_comi_round (const struct 
builtin_description *d,
     case ORDERED:
       if (!ordered)
        {
-         /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US.  */
-         if (!non_signaling)
-           ordered = true;
-         mode = CCSmode;
+         if (TARGET_AVX10_2_256 && comx_ok)
+           {
+             /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+                differently. So directly return true here.  */
+             target = gen_reg_rtx (SImode);
+             emit_move_insn (target, const1_rtx);
+             return target;
+           }
+         else
+           {
+             /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US.  */
+             if (!non_signaling)
+               ordered = true;
+             mode = CCSmode;
+           }
        }
       else
        {
@@ -12280,10 +12318,21 @@ ix86_expand_sse_comi_round (const struct 
builtin_description *d,
     case UNORDERED:
       if (ordered)
        {
-         /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS.  */
-         if (non_signaling)
-           ordered = false;
-         mode = CCSmode;
+         if (TARGET_AVX10_2_256 && comx_ok)
+           {
+             /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+                differently. So directly return false here.  */
+             target = gen_reg_rtx (SImode);
+             emit_move_insn (target, const0_rtx);
+             return target;
+           }
+         else
+           {
+             /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS.  */
+             if (non_signaling)
+               ordered = false;
+             mode = CCSmode;
+           }
        }
       else
        {
@@ -12314,17 +12363,23 @@ ix86_expand_sse_comi_round (const struct 
builtin_description *d,
       if (ordered == non_signaling)
        ordered = !ordered;
       break;
-    case EQ:
       /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCZmode for
-        _CMP_EQ_OQ/_CMP_EQ_OS.  */
-      check_unordered = true;
+        _CMP_EQ_OQ/_CMP_EQ_OS.
+        Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+        of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN.  */
+    case EQ:
+      if (!TARGET_AVX10_2_256 || !comx_ok)
+       check_unordered = true;
       mode = CCZmode;
       break;
     case NE:
       /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCZmode for
-        _CMP_NEQ_UQ/_CMP_NEQ_US.  */
+        _CMP_NEQ_UQ/_CMP_NEQ_US.
+        Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+        of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN.  */
       gcc_assert (!ordered);
-      check_unordered = true;
+      if (!TARGET_AVX10_2_256 || !comx_ok)
+       check_unordered = true;
       mode = CCZmode;
       const_val = const1_rtx;
       break;
@@ -12343,14 +12398,77 @@ ix86_expand_sse_comi_round (const struct 
builtin_description *d,
       || !insn_p->operand[1].predicate (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
+    /* Generate comx instead of comi when EQ/NE to avoid NAN checks.
+       Use orig_comp to exclude ORDERED/UNORDERED cases.  */
+  if ((orig_comp == EQ || orig_comp == NE)
+      && TARGET_AVX10_2_256 && comx_ok)
+    {
+      switch (icode)
+       {
+       case CODE_FOR_avx512fp16_comi_round:
+         icode = CODE_FOR_avx10_2_comxhf_round;
+         break;
+       case CODE_FOR_sse_comi_round:
+         icode = CODE_FOR_avx10_2_comxsf_round;
+         break;
+       case CODE_FOR_sse2_comi_round:
+         icode = CODE_FOR_avx10_2_comxdf_round;
+         break;
+
+       default:
+         break;
+       }
+    }
+
+  /* Generate comi instead of comx when UNEQ/LTGT to avoid NAN checks.  */
+  if ((comparison == UNEQ || comparison == LTGT)
+       && TARGET_AVX10_2_256 && comx_ok)
+    {
+      switch (icode)
+       {
+       case CODE_FOR_avx10_2_comxhf_round:
+         icode = CODE_FOR_avx512fp16_comi_round;
+         break;
+       case CODE_FOR_avx10_2_comxsf_round:
+         icode = CODE_FOR_sse_comi_round;
+         break;
+       case CODE_FOR_avx10_2_comxdf_round:
+         icode = CODE_FOR_sse2_comi_round;
+         break;
+
+       default:
+         break;
+       }
+    }
+
   /*
-     1. COMI: ordered and signaling.
-     2. UCOMI: unordered and non-signaling.
+     1. COMI/VCOMX: ordered and signaling.
+     2. UCOMI/VUCOMX: unordered and non-signaling.
    */
   if (non_signaling)
-    icode = (icode == CODE_FOR_sse_comi_round
-            ? CODE_FOR_sse_ucomi_round
-            : CODE_FOR_sse2_ucomi_round);
+    switch (icode)
+      {
+      case CODE_FOR_sse_comi_round:
+       icode = CODE_FOR_sse_ucomi_round;
+       break;
+      case CODE_FOR_sse2_comi_round:
+       icode = CODE_FOR_sse2_ucomi_round;
+       break;
+      case CODE_FOR_avx512fp16_comi_round:
+       icode = CODE_FOR_avx512fp16_ucomi_round;
+       break;
+      case CODE_FOR_avx10_2_comxsf_round:
+       icode = CODE_FOR_avx10_2_ucomxsf_round;
+       break;
+      case CODE_FOR_avx10_2_comxhf_round:
+       icode = CODE_FOR_avx10_2_ucomxhf_round;
+       break;
+      case CODE_FOR_avx10_2_comxdf_round:
+       icode = CODE_FOR_avx10_2_ucomxdf_round;
+       break;
+      default:
+       gcc_unreachable ();
+      }
 
   pat = GEN_FCN (icode) (op0, op1, op3);
   if (! pat)
@@ -12487,7 +12605,7 @@ ix86_expand_round_builtin (const struct 
builtin_description *d,
       break;
     case INT_FTYPE_V4SF_V4SF_INT_INT:
     case INT_FTYPE_V2DF_V2DF_INT_INT:
-      return ix86_expand_sse_comi_round (d, exp, target);
+      return ix86_expand_sse_comi_round (d, exp, target, true);
     case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT:
     case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
@@ -15628,7 +15746,7 @@ rdseed_step:
          case IX86_BUILTIN_VCOMSBF16GE:
          case IX86_BUILTIN_VCOMSBF16LT:
          case IX86_BUILTIN_VCOMSBF16LE:
-          return ix86_expand_sse_comi (bdesc_args + i, exp, target);
+           return ix86_expand_sse_comi (bdesc_args + i, exp, target, false);
          case IX86_BUILTIN_FABSQ:
          case IX86_BUILTIN_COPYSIGNQ:
            if (!TARGET_SSE)
@@ -15644,7 +15762,7 @@ rdseed_step:
       && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
     {
       i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
-      return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
+      return ix86_expand_sse_comi (bdesc_comi + i, exp, target, true);
     }
 
   if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 34f9214115ea..b56a51be09fb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -139,6 +139,7 @@
   UNSPEC_SCALEF
   UNSPEC_PCMP
   UNSPEC_CVTBFSF
+  UNSPEC_COMX
 
   ;; Generic math support
   UNSPEC_IEEE_MIN      ; not commutative
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a6d844d00b4c..da91d39cf8eb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4692,6 +4692,22 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_insn "avx10_2_<unord>comx<mode><round_saeonly_name>"
+  [(set (reg:CCFP FLAGS_REG)
+       (unspec:CCFP
+         [(vec_select:MODEFH
+            (match_operand:<ssevecmode> 0 "register_operand" "v")
+            (parallel [(const_int 0)]))
+          (vec_select:MODEFH
+            (match_operand:<ssevecmode> 1 
"<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+            (parallel [(const_int 0)]))]
+         UNSPEC_COMX))]
+  "TARGET_AVX10_2_256"
+  "v<unord>comx<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, 
%<iptr>1<round_saeonly_op2>}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
   [(set (reg:CCFP FLAGS_REG)
        (compare:CCFP
@@ -4701,7 +4717,7 @@
          (vec_select:MODEFH
            (match_operand:<ssevecmode> 1 
"<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
            (parallel [(const_int 0)]))))]
-  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "SSE_FLOAT_MODE_P (<MODE>mode) || <MODE>mode == E_HFmode"
   "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, 
%<iptr>1<round_saeonly_op2>}"
   [(set_attr "type" "ssecomi")
    (set_attr "prefix" "maybe_vex")
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c
new file mode 100644
index 000000000000..99d32186e6ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vcomxsd\[ 
\\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1  } } */
+/* { dg-final { scan-assembler-times "vcomxss\[ 
\\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vucomxsd\[ 
\\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1  } } */
+/* { dg-final { scan-assembler-times "vucomxss\[ 
\\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1  } } */
+
+#include <immintrin.h>
+
+volatile __m128 x3;
+volatile __m128d x4;
+volatile int a;
+
+void extern
+avx10_2_test (void)
+{
+  a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OS, _MM_FROUND_NO_EXC);
+  a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_US, _MM_FROUND_NO_EXC);
+  a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+  a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_UQ, _MM_FROUND_NO_EXC);
+}

Reply via email to