Hi Uros and GCC:
  This patch is to fix ix86_expand_sse_comi_round whose implementation
was not correct.
  New implentation aligns with _mm_cmp_round_s[sd]_mask.

Bootstrap and regression tests for x86 is fine.
Ok for trunk?


ChangeLog:
gcc/
   * config/i386/i386-expand.c (ix86_expand_sse_comi_round):
   Modified, original implementation isn't correct.

gcc/testsuite
   * gcc.target/i386/avx512f-vcomisd-2.c: New runtime tests.
   * gcc.target/i386/avx512f-vcomisd-2.c: Likewise.

-- 
BR,
Hongtao
Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 270933)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,11 @@
+2019-05-06  H.J. Lu  <hongjiu...@intel.com>
+	    Hongtao Liu  <hongtao....@intel.com>
+
+	PR Target/89750
+	PR Target/86444
+	* config/i386/i386-expand.c (ix86_expand_sse_comi_round):
+	Modified, original implementation isn't correct.
+
 2019-05-06  Segher Boessenkool  <seg...@kernel.crashing.org>
 
 	* config/rs6000/rs6000.md (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)
Index: gcc/config/i386/i386-expand.c
===================================================================
--- gcc/config/i386/i386-expand.c	(revision 270933)
+++ gcc/config/i386/i386-expand.c	(working copy)
@@ -9853,18 +9853,24 @@
   const struct insn_data_d *insn_p = &insn_data[icode];
   machine_mode mode0 = insn_p->operand[0].mode;
   machine_mode mode1 = insn_p->operand[1].mode;
-  enum rtx_code comparison = UNEQ;
-  bool need_ucomi = false;
 
   /* See avxintrin.h for values.  */
-  enum rtx_code comi_comparisons[32] =
+  static const enum rtx_code comparisons[32] =
     {
-      UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
-      UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
-      UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
+      EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
+      EQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED,
+      EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
+      EQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED
     };
-  bool need_ucomi_values[32] =
+  static const bool ordereds[32] =
     {
+      true,  true,  true,  false, false, false, false, true,
+      false, false, false, true,  true,  true,  true,  false,
+      true,  true,  true,  false, false, false, false, true,
+      false, false, false, true,  true,  true,  true,  false
+    };
+  static const bool non_signalings[32] =
+    {
       true,  false, false, true,  true,  false, false, true,
       true,  false, false, true,  true,  false, false, true,
       false, true,  true,  false, false, true,  true,  false,
@@ -9888,16 +9894,95 @@
       return const0_rtx;
     }
 
-  comparison = comi_comparisons[INTVAL (op2)];
-  need_ucomi = need_ucomi_values[INTVAL (op2)];
-
   if (VECTOR_MODE_P (mode0))
     op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
     op1 = safe_vector_operand (op1, mode1);
 
+  enum rtx_code comparison = comparisons[INTVAL (op2)];
+  bool ordered = ordereds[INTVAL (op2)];
+  bool non_signaling = non_signalings[INTVAL (op2)];
+  rtx const_val = const0_rtx;
+  
+  bool check_unordered = false;
+  machine_mode mode = CCFPmode;
+  switch (comparison)
+    {
+    case ORDERED:
+      if (!ordered)
+	{
+	  /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US.  */
+	  if (!non_signaling)
+	    ordered = true;
+	  mode = CCSmode;
+	}
+      else
+	{
+	  /* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S.  */
+	  if (non_signaling)
+	    ordered = false;
+	  mode = CCPmode;
+	}
+      comparison = NE;
+      break;
+    case UNORDERED:
+      if (ordered)
+	{
+	  /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS.  */
+	  if (non_signaling)
+	    ordered = false;
+	  mode = CCSmode;
+	}
+      else
+	{
+	  /* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S.  */
+	  if (!non_signaling)
+	    ordered = true;
+	  mode = CCPmode;
+	}
+      comparison = EQ;
+      break;
+
+    case LE:	/* -> GE  */
+    case LT:	/* -> GT  */
+    case UNGE:	/* -> UNLE  */
+    case UNGT:	/* -> UNLT  */
+      std::swap (op0, op1);
+      comparison = swap_condition (comparison);
+      /* FALLTHRU */
+    case GT:
+    case GE:
+    case UNEQ:
+    case UNLT:
+    case UNLE:
+    case LTGT:
+      /* These are supported by CCFPmode.  NB: Use ordered/signaling
+	 COMI or unordered/non-signaling UCOMI.  Both set ZF, PF, CF
+	 with NAN operands.  */
+      if (ordered == non_signaling)
+	ordered = !ordered;
+      break;
+    case EQ:
+      if (ordered)
+	check_unordered = true;
+      /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCmode for
+	 _CMP_EQ_OQ/_CMP_EQ_OS/_CMP_EQ_UQ/_CMP_EQ_US.  */
+      mode = CCmode;
+      break;
+    case NE:
+      gcc_assert (!ordered);
+      check_unordered = true;
+      /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCmode for
+	 _CMP_NEQ_UQ/_CMP_NEQ_US.  */
+      mode = CCmode;
+      const_val = const1_rtx;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
   target = gen_reg_rtx (SImode);
-  emit_move_insn (target, const0_rtx);
+  emit_move_insn (target, const_val);
   target = gen_rtx_SUBREG (QImode, target, 0);
 
   if ((optimize && !register_operand (op0, mode0))
@@ -9907,10 +9992,14 @@
       || !insn_p->operand[1].predicate (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
-  if (need_ucomi)
-    icode = icode == CODE_FOR_sse_comi_round
-		     ? CODE_FOR_sse_ucomi_round
-		     : CODE_FOR_sse2_ucomi_round;
+  /*
+     1. COMI: ordered and signaling.
+     2. UCOMI: unordered and non-signaling.
+   */
+  if (non_signaling)
+    icode = (icode == CODE_FOR_sse_comi_round
+	     ? CODE_FOR_sse_ucomi_round
+	     : CODE_FOR_sse2_ucomi_round);
 
   pat = GEN_FCN (icode) (op0, op1, op3);
   if (! pat)
@@ -9932,11 +10021,37 @@
     }
 
   emit_insn (pat);
+
+  rtx_code_label *label = NULL;
+
+  /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
+     with NAN operands.  */
+  if (check_unordered)
+    {
+      gcc_assert (comparison == EQ || comparison == NE);
+
+      rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
+      label = gen_label_rtx ();
+      rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+    }
+
+  /* XXX: Set CCFPmode and check a different CCmode.  Does it work
+     correctly?  */
+  if (GET_MODE (set_dst) != mode)
+    set_dst = gen_rtx_REG (mode, REGNO (set_dst));
+
   emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
 			  gen_rtx_fmt_ee (comparison, QImode,
 					  set_dst,
 					  const0_rtx)));
 
+  if (label)
+    emit_label (label);
+
   return SUBREG_REG (target);
 }
 
Index: gcc/testsuite/ChangeLog
===================================================================
--- gcc/testsuite/ChangeLog	(revision 270933)
+++ gcc/testsuite/ChangeLog	(working copy)
@@ -1,3 +1,11 @@
+2019-05-06  H.J. Lu  <hongjiu...@intel.com>
+	    Hongtao Liu  <hongtao....@intel.com>
+
+	PR Target/89750
+	PR Target/86444
+	* gcc.target/i386/avx512f-vcomisd-2.c: New.
+	* gcc.target/i386/avx512f-vcomisd-2.c: Likewise.
+
 2019-05-06  Steven G. Kargl  <ka...@gcc.gnu.org>
 
 	PR fortran/90290
Index: gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c	(working copy)
@@ -0,0 +1,104 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+static inline void __attribute__ ((__always_inline__))
+check_cmp (double s1, double s2, const int imm, int expected)
+{
+  __m128d source1 = _mm_load_sd (&s1);
+  __m128d source2 = _mm_load_sd (&s2);
+  int res = _mm_comi_round_sd (source1, source2, imm,
+			       _MM_FROUND_NO_EXC);
+  if (expected != res)
+    abort();
+}
+
+static void
+do_check (double s1, double s2)
+{
+  check_cmp (s1, s2, _CMP_EQ_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 == s2);
+  check_cmp (s1, s2, _CMP_LT_OS,
+	     !__builtin_isunordered (s1, s2) && s1 < s2);
+  check_cmp (s1, s2, _CMP_LE_OS,
+	     !__builtin_isunordered (s1, s2) && s1 <= s2);
+  check_cmp (s1, s2, _CMP_UNORD_Q,
+	     __builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_NEQ_UQ,
+	     __builtin_isunordered (s1, s2) || s1 != s2);
+  check_cmp (s1, s2, _CMP_NLT_US,
+	     __builtin_isunordered (s1, s2) || s1 >= s2);
+  check_cmp (s1, s2, _CMP_NLE_US,
+	     __builtin_isunordered (s1, s2) || s1 > s2);
+  check_cmp (s1, s2, _CMP_ORD_Q,
+	     !__builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_EQ_UQ,
+	     __builtin_isunordered (s1, s2) || s1 == s2);
+  check_cmp (s1, s2, _CMP_NGE_US,
+	     __builtin_isunordered (s1, s2) || s1 < s2);
+  check_cmp (s1, s2, _CMP_NGT_US,
+	     __builtin_isunordered (s1, s2) || s1 <= s2);
+  check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
+  check_cmp (s1, s2, _CMP_NEQ_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 != s2);
+  check_cmp (s1, s2, _CMP_GE_OS,
+	     !__builtin_isunordered (s1, s2) && s1 >= s2);
+  check_cmp (s1, s2, _CMP_GT_OS,
+	     !__builtin_isunordered (s1, s2) && s1 > s2);
+  check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
+  check_cmp (s1, s2, _CMP_EQ_OS,
+	     !__builtin_isunordered (s1, s2) && s1 == s2);
+  check_cmp (s1, s2, _CMP_LT_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 < s2);
+  check_cmp (s1, s2, _CMP_LE_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 <= s2);
+  check_cmp (s1, s2, _CMP_UNORD_S,
+	     __builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_NEQ_US,
+	     __builtin_isunordered (s1, s2) || s1 != s2);
+  check_cmp (s1, s2, _CMP_NLT_UQ,
+	     __builtin_isunordered (s1, s2) || s1 >= s2);
+  check_cmp (s1, s2, _CMP_NLE_UQ,
+	     __builtin_isunordered (s1, s2) || s1 > s2);
+  check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_EQ_US,
+	     __builtin_isunordered (s1, s2) || s1 == s2);
+  check_cmp (s1, s2, _CMP_NGE_UQ,
+	     __builtin_isunordered (s1, s2) || s1 < s2);
+  check_cmp (s1, s2, _CMP_NGT_UQ,
+	     __builtin_isunordered (s1, s2) || s1 <= s2);
+  check_cmp (s1, s2, _CMP_FALSE_OS, 0);
+  check_cmp (s1, s2, _CMP_NEQ_OS,
+	     !__builtin_isunordered (s1, s2) && s1 != s2);
+  check_cmp (s1, s2, _CMP_GE_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 >= s2);
+  check_cmp (s1, s2, _CMP_GT_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 > s2);
+  check_cmp (s1, s2, _CMP_TRUE_US, 1);
+}
+
+static void
+avx512f_test (void)
+{
+  struct
+    {
+      double x1;
+      double x2;
+    }
+  inputs[] =
+    { 
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nan (""), -5.8 },
+      { -4.8, __builtin_nans ("") },
+      { 3.8, __builtin_nans ("") },
+      { 4.2, 4.2 },
+      { __builtin_nan (""), __builtin_nans ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}
Index: gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c	(working copy)
@@ -0,0 +1,104 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+static inline void __attribute__ ((__always_inline__))
+check_cmp (float s1, float s2, const int imm, int expected)
+{
+  __m128 source1 = _mm_load_ss (&s1);
+  __m128 source2 = _mm_load_ss (&s2);
+  int res = _mm_comi_round_ss (source1, source2, imm,
+			       _MM_FROUND_NO_EXC);
+  if (expected != res)
+    abort();
+}
+
+static void
+do_check (float s1, float s2)
+{
+  check_cmp (s1, s2, _CMP_EQ_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 == s2);
+  check_cmp (s1, s2, _CMP_LT_OS,
+	     !__builtin_isunordered (s1, s2) && s1 < s2);
+  check_cmp (s1, s2, _CMP_LE_OS,
+	     !__builtin_isunordered (s1, s2) && s1 <= s2);
+  check_cmp (s1, s2, _CMP_UNORD_Q,
+	     __builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_NEQ_UQ,
+	     __builtin_isunordered (s1, s2) || s1 != s2);
+  check_cmp (s1, s2, _CMP_NLT_US,
+	     __builtin_isunordered (s1, s2) || s1 >= s2);
+  check_cmp (s1, s2, _CMP_NLE_US,
+	     __builtin_isunordered (s1, s2) || s1 > s2);
+  check_cmp (s1, s2, _CMP_ORD_Q,
+	     !__builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_EQ_UQ,
+	     __builtin_isunordered (s1, s2) || s1 == s2);
+  check_cmp (s1, s2, _CMP_NGE_US,
+	     __builtin_isunordered (s1, s2) || s1 < s2);
+  check_cmp (s1, s2, _CMP_NGT_US,
+	     __builtin_isunordered (s1, s2) || s1 <= s2);
+  check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
+  check_cmp (s1, s2, _CMP_NEQ_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 != s2);
+  check_cmp (s1, s2, _CMP_GE_OS,
+	     !__builtin_isunordered (s1, s2) && s1 >= s2);
+  check_cmp (s1, s2, _CMP_GT_OS,
+	     !__builtin_isunordered (s1, s2) && s1 > s2);
+  check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
+  check_cmp (s1, s2, _CMP_EQ_OS,
+	     !__builtin_isunordered (s1, s2) && s1 == s2);
+  check_cmp (s1, s2, _CMP_LT_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 < s2);
+  check_cmp (s1, s2, _CMP_LE_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 <= s2);
+  check_cmp (s1, s2, _CMP_UNORD_S,
+	     __builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_NEQ_US,
+	     __builtin_isunordered (s1, s2) || s1 != s2);
+  check_cmp (s1, s2, _CMP_NLT_UQ,
+	     __builtin_isunordered (s1, s2) || s1 >= s2);
+  check_cmp (s1, s2, _CMP_NLE_UQ,
+	     __builtin_isunordered (s1, s2) || s1 > s2);
+  check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
+  check_cmp (s1, s2, _CMP_EQ_US,
+	     __builtin_isunordered (s1, s2) || s1 == s2);
+  check_cmp (s1, s2, _CMP_NGE_UQ,
+	     __builtin_isunordered (s1, s2) || s1 < s2);
+  check_cmp (s1, s2, _CMP_NGT_UQ,
+	     __builtin_isunordered (s1, s2) || s1 <= s2);
+  check_cmp (s1, s2, _CMP_FALSE_OS, 0);
+  check_cmp (s1, s2, _CMP_NEQ_OS,
+	     !__builtin_isunordered (s1, s2) && s1 != s2);
+  check_cmp (s1, s2, _CMP_GE_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 >= s2);
+  check_cmp (s1, s2, _CMP_GT_OQ,
+	     !__builtin_isunordered (s1, s2) && s1 > s2);
+  check_cmp (s1, s2, _CMP_TRUE_US, 1);
+}
+
+static void
+avx512f_test (void)
+{
+  struct
+    {
+      float x1;
+      float x2;
+    }
+  inputs[] =
+    { 
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nanf (""), -5.8 },
+      { -4.8, __builtin_nansf ("") },
+      { 3.8, __builtin_nansf ("") },
+      { 4.2, 4.2 },
+      { __builtin_nanf (""), __builtin_nansf ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}

Reply via email to