Hi Uros,
Very many thanks for the speedy review and approval of my ix86_rtx_costs
patch to correct the cost of multi-word multiplications.  In the same
spirit, this patch tidies up a few additional nits I noticed while there:
Multi-word NOT requires two operations, but multi-word NEG requires
three operations.  Using SSE, vector NOT requires a pxor with -1, but
AND of NOT is cheap thanks to the existence of pandn.  There's also some
legacy (aka incorrect) logic explicitly testing for DImode [independently
of TARGET_64BIT] in determining the cost of logic operations that's not
required.

There should be no behavioural changes from this patch (as confirmed by
testing) but hopefully vectorization and other middle-end passes can now
rely on more sensible "cost" estimates.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}, with
no new failures.  Ok for mainline?


2022-05-18  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/i386/i386.cc (ix86_rtx_costs): Split AND from XOR/IOR.
        Multi-word binary logic operations require two instructions.
        For vector integer modes, AND with a NOT operand requires only
        a single instruction (pandn).
        [NOT]: Vector integer NOT requires more than 1 instruction (pxor).
        [NEG]: Multi-word negation requires 3 instructions.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 86752a6..1701244 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20744,26 +20744,67 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
        }
       /* FALLTHRU */
 
-    case AND:
     case IOR:
     case XOR:
       if (GET_MODE_CLASS (mode) == MODE_INT
          && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
        {
-         *total = (cost->add * 2
-                   + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
-                      << (GET_MODE (XEXP (x, 0)) != DImode))
-                   + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
-                      << (GET_MODE (XEXP (x, 1)) != DImode)));
+         *total = cost->add * 2
+                  + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+                  + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+         return true;
+       }
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+       *total = ix86_vec_cost (mode, cost->sse_op);
+      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+       *total = cost->add * 2;
+      else
+       *total = cost->add;
+      return false;
+
+    case AND:
+      if (GET_MODE_CLASS (mode) == MODE_INT
+         && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+       {
+         *total = cost->add * 2
+                  + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+                  + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
          return true;
        }
-      else if (code == AND
-              && address_no_seg_operand (x, mode))
+      else if (address_no_seg_operand (x, mode))
        {
          *total = cost->lea;
          return true;
        }
-      /* FALLTHRU */
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+       {
+         /* pandn is a single instruction.  */
+         if (GET_CODE (XEXP (x, 0)) == NOT)
+           {
+             *total = ix86_vec_cost (mode, cost->sse_op)
+                      + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+                                  outer_code, opno, speed)
+                      + rtx_cost (XEXP (x, 1), mode,
+                                  outer_code, opno, speed);
+             return true;
+           }
+         *total = ix86_vec_cost (mode, cost->sse_op);
+       }
+      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+       *total = cost->add * 2;
+      else
+       *total = cost->add;
+      return false;
+
+    case NOT:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+       // vnot is pxor -1.
+       *total = ix86_vec_cost (mode, cost->sse_op) + 1;
+      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+       *total = cost->add * 2;
+      else
+       *total = cost->add;
+      return false;
 
     case NEG:
       if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
@@ -20781,13 +20822,10 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
          *total = ix86_vec_cost (mode, cost->sse_op);
          return false;
        }
-      /* FALLTHRU */
-
-    case NOT:
-      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
        *total = ix86_vec_cost (mode, cost->sse_op);
       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
-       *total = cost->add * 2;
+       *total = cost->add * 3;
       else
        *total = cost->add;
       return false;

Reply via email to