Hi,
This patch adds pattern matching for float<->int conversions both as normal
statements and promote_demote.  While updating promote_demote I noticed that
in cleanups I turned "stmt_cost =" into "int stmt_cost = " which turned
the existing FP costing to NOOP. I also added comment on how demotes are done
when turning i.e. 32bit into 8bit value (which is the case of pr19919.c).

The patch disables vectorization in pr119919.c on generic tuning, but keeps
it at both zen and skylake+. The underlying problem is bad cost of open-coded
scatter which is tracked by 119902 so I simply added -mtune=znver1 so the 
testcase
keeps testing vectorization.

bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

        * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add 
FLOAT_EXPR;
        FIX_TRUNC_EXPR and vec_promote_demote costs.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr119919.c: Add -mtune=znver1

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bef95ea18c8..fd36ea802c0 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25767,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
                          (ix86_tune_cost, GET_MODE_BITSIZE (mode));
          break;
 
+       case FLOAT_EXPR:
+           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+             stmt_cost = ix86_cost->cvtsi2ss;
+           else if (X87_FLOAT_MODE_P (mode))
+             /* TODO: We do not have cost tables for x87.  */
+             stmt_cost = ix86_cost->fadd;
+           else
+             stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+           break;
+
+       case FIX_TRUNC_EXPR:
+           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+             stmt_cost = ix86_cost->cvtss2si;
+           else if (X87_FLOAT_MODE_P (mode))
+             /* TODO: We do not have cost tables for x87.  */
+             stmt_cost = ix86_cost->fadd;
+           else
+             stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+           break;
+
        case COND_EXPR:
          {
            /* SSE2 conditinal move sequence is:
@@ -25930,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
        break;
       }
 
-  if (kind == vec_promote_demote
-      && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+  if (kind == vec_promote_demote)
     {
       int outer_size
        = tree_to_uhwi
@@ -25941,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
        = tree_to_uhwi
            (TYPE_SIZE
                (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
-      int stmt_cost = vec_fp_conversion_cost
-                       (ix86_tune_cost, GET_MODE_BITSIZE (mode));
-      /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will 
end
-        up doing two conversions and packing them.  */
+      bool inner_fp = FLOAT_TYPE_P
+                       (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+      if (fp && inner_fp)
+       stmt_cost = vec_fp_conversion_cost
+                         (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+      else if (fp && !inner_fp)
+       stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+      else if (!fp && inner_fp)
+       stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+      else
+       stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+      /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+        greater than inner size we will end up doing two conversions and
+        packing them.  We always pack pairs; if the size difference is greater
+        it is split into multiple demote operations.  */
       if (inner_size > outer_size)
-       {
-         int n = inner_size / outer_size;
-         stmt_cost = stmt_cost * n
-                     + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
-       }
+       stmt_cost = stmt_cost * 2
+                   + ix86_vec_cost (mode, ix86_cost->sse_op);
     }
 
   /* If we do elementwise loads into a vector then we are bound by
diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c 
b/gcc/testsuite/gcc.target/i386/pr119919.c
index ed646561bd1..e39819f682d 100644
--- a/gcc/testsuite/gcc.target/i386/pr119919.c
+++ b/gcc/testsuite/gcc.target/i386/pr119919.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -msse2 -fdump-tree-vect-details -mtune=znver1" } */
 int a[9*9];
 bool b[9];
 void test()

Reply via email to