https://gcc.gnu.org/g:2056d52d74070f50c5f8a22e4a600fcc3974fd88

commit r16-117-g2056d52d74070f50c5f8a22e4a600fcc3974fd88
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Thu Apr 24 18:37:55 2025 +0200

    Fix i386 vectorizer cost of COND_EXPR and MIN_MAX with one of parameters 0 
or -1
    
    gcc/ChangeLog:
    
            PR target/119919
            * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Account
            correctly cond_expr and min/max when one of operands is 0 or -1.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr119919.c: New test.

Diff:
---
 gcc/config/i386/i386.cc                  | 43 ++++++++++++++++++++++++++------
 gcc/testsuite/gcc.target/i386/pr119919.c | 13 ++++++++++
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3b4dfd9a9903..78df3d9525ae 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25375,14 +25375,32 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
        case COND_EXPR:
          {
            /* SSE2 conditinal move sequence is:
-                pcmpgtd %xmm5, %xmm0
+                pcmpgtd %xmm5, %xmm0 (accounted separately)
                 pand    %xmm0, %xmm2
                 pandn   %xmm1, %xmm0
                 por     %xmm2, %xmm0
               while SSE4 uses cmp + blend
-              and AVX512 masked moves.  */
-
-           int ninsns = TARGET_SSE4_1 ? 2 : 4;
+              and AVX512 masked moves.
+
+              The condition is accounted separately since we usually have
+                p = a < b
+                c = p ? x : y
+              and we will account first statement as setcc.  Exception is when
+              p is loaded from memory as bool and then we will not acocunt
+              the compare, but there is no way to check for this.  */
+
+           int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+           /* If one of parameters is 0 or -1 the sequence will be simplified:
+              (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
+           if (ninsns > 1
+               && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+                   || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+                   || integer_minus_onep
+                       (gimple_assign_rhs2 (stmt_info->stmt))
+                   || integer_minus_onep
+                       (gimple_assign_rhs3 (stmt_info->stmt))))
+             ninsns = 1;
 
            if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
              stmt_cost = ninsns * ix86_cost->sse_op;
@@ -25393,8 +25411,8 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
            else if (VECTOR_MODE_P (mode))
              stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
            else
-             /* compare + cmov.  */
-             stmt_cost = ix86_cost->add * 2;
+             /* compare (accounted separately) + cmov.  */
+             stmt_cost = ix86_cost->add;
          }
          break;
 
@@ -25416,9 +25434,18 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
                {
                  stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
                  /* vpmin was introduced in SSE3.
-                    SSE2 needs pcmpgtd + pand + pandn + pxor.  */
+                    SSE2 needs pcmpgtd + pand + pandn + pxor.
+                    If one of parameters is 0 or -1 the sequence is simplified
+                    to pcmpgtd + pand.  */
                  if (!TARGET_SSSE3)
-                   stmt_cost *= 4;
+                   {
+                     if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+                         || integer_minus_onep
+                               (gimple_assign_rhs2 (stmt_info->stmt)))
+                       stmt_cost *= 2;
+                     else
+                       stmt_cost *= 4;
+                   }
                }
              else
                /* cmp + cmov.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c 
b/gcc/testsuite/gcc.target/i386/pr119919.c
new file mode 100644
index 000000000000..ed646561bd1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119919.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
+int a[9*9];
+bool b[9];
+void test()
+{
+        for (int i = 0; i < 9; i++)
+        {
+                b[i] = a[i*9] != 0;
+        }
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized using 8 byte vectors" "vect" } 
} */

Reply via email to