https://gcc.gnu.org/g:c85148d036d17295bb2560e10020c924c83a5d13

commit r16-358-gc85148d036d17295bb2560e10020c924c83a5d13
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Fri May 2 15:53:35 2025 +0200

    Make ix86 cost of VEC_SELECT equivalent to SUBREG cost 1
    
    This patch fixes regression of imagick with PGO and AVX512 where correcting 
size
    cost of SSE operations (to be 4 instead of 2 originally cut&pasted from x87)
    made late combine to eliminate zero registers introduced by rapd.  The 
problem
    is that cost-model mistakely accounts VEC_SELECT as real instruction while 
it is
    optimized to nothing if src==dest (which is the case of these testcases).
    This register is used to eliminate false dependency between source and 
destination
    of int->fp conversions.
    
    While ix86_insn_cost hook already contains logic to incrase cost of the 
zero-extend
    the costs was not enough.
    
    gcc/ChangeLog:
    
            PR target/119900
            * config/i386/i386.cc (ix86_can_change_mode_class): Add TODO
            comment.
            (ix86_rtx_costs): Make VEC_SELECT equivalent to SUBREG cost 1.

Diff:
---
 gcc/config/i386/i386.cc | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index cb348cb9cfb8..0c808c22b4f0 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20978,7 +20978,11 @@ ix86_can_change_mode_class (machine_mode from, 
machine_mode to,
     return true;
 
   /* x87 registers can't do subreg at all, as all values are reformatted
-     to extended precision.  */
+     to extended precision.
+
+     ??? middle-end queries mode changes for ALL_REGS and this makes
+     vec_series_lowpart_p to always return false.  We probably should
+     restrict this to modes supported by i387 and check if it is enabled.  */
   if (MAYBE_FLOAT_CLASS_P (regclass))
     return false;
 
@@ -22756,13 +22760,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
        }
       return false;
 
-    case VEC_SELECT:
     case VEC_CONCAT:
       /* ??? Assume all of these vector manipulation patterns are
         recognizable.  In which case they all pretty much have the
-        same cost.  */
+        same cost.
+        ??? We should still recruse when computing cost.  */
      *total = cost->sse_op;
      return true;
+
+    case VEC_SELECT:
+     /* Special case extracting lower part from the vector.
+       This by itself needs to code and most of SSE/AVX instructions have
+       packed and single forms where the single form may be represented
+       by such VEC_SELECT.
+
+       Use cost 1 (despite the fact that functionally equivalent SUBREG has
+       cost 0).  Making VEC_SELECT completely free, for example instructs CSE
+       to forward propagate VEC_SELECT into
+
+          (set (reg eax) (reg src))
+
+       which then prevents fwprop and combining. See i.e.
+       gcc.target/i386/pr91103-1.c.
+
+       ??? rtvec_series_p test should be, for valid patterns, equivalent to
+       vec_series_lowpart_p but is not, since the latter calls
+       can_cange_mode_class on ALL_REGS and this return false since x87 does
+       not support subregs at all.  */
+     if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+       *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+                         outer_code, opno, speed) + 1;
+     else
+       /* ??? We should still recruse when computing cost.  */
+       *total = cost->sse_op;
+     return true;
+
     case VEC_DUPLICATE:
       *total = rtx_cost (XEXP (x, 0),
                         GET_MODE (XEXP (x, 0)),
@@ -22780,6 +22812,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
       if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
        *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
       else
+       /* ??? We should still recruse when computing cost.  */
        *total = cost->sse_op;
       return true;

Reply via email to