On Fri, Apr 17, 2020 at 02:12:19PM +0200, Uros Bizjak wrote:
> Is it possible to perform widening only for !TARGET_PARTIAL_REG_STALL?
> I'm worried that highpart may not be cleared, so the effects of
> partial reg stall can be quite noticeable.

So, like this then?
All I've added was another condition, interdiff:
diff -u gcc/config/i386/i386.md gcc/config/i386/i386.md
--- gcc/config/i386/i386.md     2020-04-16 14:21:41.686972979 +0200
+++ gcc/config/i386/i386.md     2020-04-17 14:29:33.882690098 +0200
@@ -8750,6 +8750,18 @@
                             == GET_MODE_PRECISION (GET_MODE (operands[2]))
                             && !register_operand (operands[2],
                                                   GET_MODE (operands[2])))
+                        /* And we shouldn't widen if
+                           TARGET_PARTIAL_REG_STALL.  */
+                        || (TARGET_PARTIAL_REG_STALL
+                            && (INTVAL (operands[3]) + INTVAL (operands[4])
+                                >= (paradoxical_subreg_p (operands[2])
+                                    && (GET_MODE_CLASS
+                                         (GET_MODE (SUBREG_REG (operands[2])))
+                                        == MODE_INT)
+                                    ? GET_MODE_PRECISION
+                                        (GET_MODE (SUBREG_REG (operands[2])))
+                                    : GET_MODE_PRECISION
+                                        (GET_MODE (operands[2])))))
                         ? CCZmode : CCNOmode)"
   "#"
   "&& 1"
and nothing in the splitter would need to change because the condition
would ensure that for TARGET_PARTIAL_REG_STALL we require in all problematic
cases CCZmode.  The only exception would be the pos + len == 8
optimization which we wouldn't perform (of course with CCZmode we'd still
do), but that isn't a partial reg stall thing, that is an optimization to
use shorter testb instead of testw.

2020-04-17  Jakub Jelinek  <ja...@redhat.com>
            Jeff Law  <l...@redhat.com>

        PR target/94567
        * config/i386/i386.md (*testqi_ext_3): Use CCZmode rather than
        CCNOmode in ix86_match_ccmode if len is equal to <MODE>mode precision,
        or pos + len >= 32, or pos + len is equal to operands[2] precision
        and operands[2] is not a register operand.  During splitting perform
        SImode AND if operands[0] doesn't have CCZmode and pos + len is
        equal to mode precision.

        * gcc.c-torture/execute/pr94567.c: New test.

--- gcc/config/i386/i386.md.jj  2020-04-17 08:49:33.236921107 +0200
+++ gcc/config/i386/i386.md     2020-04-17 14:29:33.882690098 +0200
@@ -8730,10 +8730,38 @@ (define_insn_and_split "*testqi_ext_3"
           && INTVAL (operands[3]) > 32
           && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
    && ix86_match_ccmode (insn,
-                        /* *testdi_1 requires CCZmode if the mask has bit
+                        /* If zero_extract mode precision is the same
+                           as len, the SF of the zero_extract
+                           comparison will be the most significant
+                           extracted bit, but this could be matched
+                           after splitting only for pos 0 len all bits
+                           trivial extractions.  Require CCZmode.  */
+                        (GET_MODE_PRECISION (<MODE>mode)
+                         == INTVAL (operands[3]))
+                        /* Otherwise, require CCZmode if we'd use a mask
+                           with the most significant bit set and can't
+                           widen it to wider mode.  *testdi_1 also
+                           requires CCZmode if the mask has bit
                            31 set and all bits above it clear.  */
-                        GET_MODE (operands[2]) == DImode
-                        && INTVAL (operands[3]) + INTVAL (operands[4]) == 32
+                        || (INTVAL (operands[3]) + INTVAL (operands[4])
+                            >= 32)
+                        /* We can't widen also if val is not a REG.  */
+                        || (INTVAL (operands[3]) + INTVAL (operands[4])
+                            == GET_MODE_PRECISION (GET_MODE (operands[2]))
+                            && !register_operand (operands[2],
+                                                  GET_MODE (operands[2])))
+                        /* And we shouldn't widen if
+                           TARGET_PARTIAL_REG_STALL.  */
+                        || (TARGET_PARTIAL_REG_STALL
+                            && (INTVAL (operands[3]) + INTVAL (operands[4])
+                                >= (paradoxical_subreg_p (operands[2])
+                                    && (GET_MODE_CLASS
+                                         (GET_MODE (SUBREG_REG (operands[2])))
+                                        == MODE_INT)
+                                    ? GET_MODE_PRECISION
+                                        (GET_MODE (SUBREG_REG (operands[2])))
+                                    : GET_MODE_PRECISION
+                                        (GET_MODE (operands[2])))))
                         ? CCZmode : CCNOmode)"
   "#"
   "&& 1"
@@ -8750,7 +8778,10 @@ (define_insn_and_split "*testqi_ext_3"
 
       /* Narrow paradoxical subregs to prevent partial register stalls.  */
       if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
-         && GET_MODE_CLASS (submode) == MODE_INT)
+         && GET_MODE_CLASS (submode) == MODE_INT
+         && (GET_MODE (operands[0]) == CCZmode
+             || pos + len < GET_MODE_PRECISION (submode)
+             || REG_P (SUBREG_REG (val))))
        {
          val = SUBREG_REG (val);
          mode = submode;
@@ -8758,14 +8789,32 @@ (define_insn_and_split "*testqi_ext_3"
     }
 
   /* Small HImode tests can be converted to QImode.  */
-  if (register_operand (val, HImode) && pos + len <= 8)
+  if (pos + len <= 8
+      && register_operand (val, HImode))
     {
-      val = gen_lowpart (QImode, val);
-      mode = QImode;
+      rtx nval = gen_lowpart (QImode, val);
+      if (!MEM_P (nval)
+         || GET_MODE (operands[0]) == CCZmode
+         || pos + len < 8)
+       {
+         val = nval;
+         mode = QImode;
+       }
     }
 
   gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
 
+  /* If the mask is going to have the sign bit set in the mode
+     we want to do the comparison in and user isn't interested just
+     in the zero flag, then we must widen the target mode.  */
+  if (pos + len == GET_MODE_PRECISION (mode)
+      && GET_MODE (operands[0]) != CCZmode)
+    {
+      gcc_assert (pos + len < 32 && !MEM_P (val));
+      mode = SImode;
+      val = gen_lowpart (mode, val);
+    }
+
   wide_int mask
     = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
 
--- gcc/testsuite/gcc.c-torture/execute/pr94567.c.jj    2020-04-17 
14:19:34.610683856 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr94567.c       2020-04-17 
14:19:34.610683856 +0200
@@ -0,0 +1,26 @@
+/* PR target/94567 */
+
+volatile int a = 1, b;
+short c, d = 4, f = 2, g;
+unsigned short e = 53736;
+
+int
+foo (int i, int j)
+{
+  return i && j ? 0 : i + j;
+}
+
+int
+main ()
+{
+  for (; a; a = 0)
+    {
+      unsigned short k = e;
+      g = k >> 3;
+      if (foo (g < (f || c), b))
+       d = 0;
+    }
+  if (d != 4)
+    __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to