https://gcc.gnu.org/g:0907a810f586b07636cc5b83dba6025eb5240655

commit r16-54-g0907a810f586b07636cc5b83dba6025eb5240655
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Mon Apr 21 20:16:50 2025 +0200

    Fix cost of vectorized double->float conversion
    
    In previous patch I miscomputed costs of cvtpd2pf instruction
    which mistakely gets accounted as 2 (VEC_PACK_TRUNC_EXPR).
    Vectorizer can produce both, but when producing VEC_PACK_TRUNC_EXPR
    it use promote_demote patch.  This patch thus simplifies
    handling of NOP_EXPR since in that case we should always be producing
    only one instruction.
    
            PR target/119879
            * config/i386/i386.cc (fp_conversion_stmt_cost): Inline to ...
            (ix86_vector_costs::add_stmt_cost): ... here; fix handling of 
NOP_EXPR.

Diff:
---
 gcc/config/i386/i386.cc | 51 +++++++++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 28603c2943ee..d15f91ddd2cb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25257,32 +25257,6 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool 
costing_for_scalar)
   return new ix86_vector_costs (vinfo, costing_for_scalar);
 }
 
-/* Return cost of statement doing FP conversion.  */
-
-static unsigned
-fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p)
-{
-  int outer_size
-    = tree_to_uhwi
-       (TYPE_SIZE
-           (TREE_TYPE (gimple_assign_lhs (stmt))));
-  int inner_size
-    = tree_to_uhwi
-       (TYPE_SIZE
-           (TREE_TYPE (gimple_assign_rhs1 (stmt))));
-  int stmt_cost = vec_fp_conversion_cost
-                   (ix86_tune_cost, GET_MODE_BITSIZE (mode));
-  /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
-     up doing two conversions and packing them.  */
-  if (!scalar_p && inner_size > outer_size)
-    {
-      int n = inner_size / outer_size;
-      stmt_cost = stmt_cost * n
-                 + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
-    }
-  return stmt_cost;
-}
-
 unsigned
 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                                  stmt_vec_info stmt_info, slp_tree node,
@@ -25394,8 +25368,8 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
                 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
            stmt_cost = 0;
          else if (fp)
-           stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt,
-                                                scalar_p);
+           stmt_cost = vec_fp_conversion_cost
+                         (ix86_tune_cost, GET_MODE_BITSIZE (mode));
          break;
 
        case BIT_IOR_EXPR:
@@ -25439,7 +25413,26 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
 
   if (kind == vec_promote_demote
       && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
-    stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p);
+    {
+      int outer_size
+       = tree_to_uhwi
+           (TYPE_SIZE
+               (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+      int inner_size
+       = tree_to_uhwi
+           (TYPE_SIZE
+               (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+      int stmt_cost = vec_fp_conversion_cost
+                       (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+      /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will 
end
+        up doing two conversions and packing them.  */
+      if (inner_size > outer_size)
+       {
+         int n = inner_size / outer_size;
+         stmt_cost = stmt_cost * n
+                     + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+       }
+    }
 
   /* If we do elementwise loads into a vector then we are bound by
      latency and execution resources for the many scalar loads

Reply via email to