Hi!

Kai has reported his type demotion patches lead to a regression, which can
be seen also without his patches by doing the type demotion by hand.
test1 is optimized using *jcc_bt<mode>_mask instruction (combiner detects
this), but test2 isn't.  In that case combiner first merges the and with
shift into *<shift_insn><mode>3_mask insn, and *jcc_bt<mode>_mask won't
match, because we end up with
(zero_extend:SI (subreg:QI (and:SI <something> (const_int 63)) 0))
and we don't simplify that.

So, my first approach was trying to simplify that, because nonzero_bits
on the subreg operand say that no bits outside of QImode may be non-zero,
both the zero_extend and subreg can be dropped.  That is the simplify-rtx.c
change.  Then I've figured out that combine.c doesn't actually attempt to
simplify this anyway, so that is the combine.c change.  And lastly an i386
pattern was needed anyway.  I've also attempted to simplify:
(zero_extend:SI (subreg:QI (and:DI <something> (const_int 63)) 0))
into
(subreg:SI (and:DI <something> (const_int 63)) 0) (very small change in
simplify-rtx.c, just drop the requirement that zero_extend mode is as wide
or wider than SUBREG_REG's mode, and when it is <= use gen_lowpart_no_emit
instead of just returning the SUBREG_REG, but that unfortunately regressed
the test1 case, we'd need some further i386.md tweaks.
While in theory this folding looks like a useful simplification, because
of this I'm wondering if other backends don't rely on those actually not
being simplified.

So, I've as an alternative implemented also an i386.md only fix.

Thus, do we want the first patch, or first patch + also the
above described further simplify-rtx.c change + some further i386.md tweaks,
or just the second patch instead?

Both have been bootstrapped/regtested on x86_64-linux and i686-linux.

        Jakub
2013-07-05  Jakub Jelinek  <ja...@redhat.com>

        PR target/57819
        * simplify-rtx.c (simplify_unary_operation_1) <case ZERO_EXTEND>:
        Simplify (zero_extend:SI (subreg:QI (and:SI (reg:SI)
        (const_int 63)) 0)).
        * combine.c (make_extraction): Create ZERO_EXTEND or SIGN_EXTEND
        using simplify_gen_unary instead of gen_rtx_*_EXTEND.
        * config/i386/i386.md (*jcc_bt<mode>_1): New define_insn_and_split.

        * gcc.target/i386/pr57819.c: New test.

--- gcc/simplify-rtx.c.jj       2013-06-01 14:47:23.000000000 +0200
+++ gcc/simplify-rtx.c  2013-07-04 16:24:48.654817120 +0200
@@ -1470,6 +1470,29 @@ simplify_unary_operation_1 (enum rtx_cod
            }
        }
 
+      /* (zero_extend:M (subreg:N <X:O>)) is <X:O> (for M == O) or
+        (zero_extend:M <X:O>), if X doesn't have any bits outside of N mode
+        non-zero.  E.g.
+        (zero_extend:SI (subreg:QI (and:SI (reg:SI) (const_int 63)) 0)) is
+        (and:SI (reg:SI) (const_int 63)).  */
+      if (GET_CODE (op) == SUBREG
+         && GET_MODE_PRECISION (GET_MODE (op))
+            < GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+         && GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+            <= HOST_BITS_PER_WIDE_INT
+         && GET_MODE_PRECISION (mode)
+            >= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))
+         && subreg_lowpart_p (op)
+         && (nonzero_bits (SUBREG_REG (op), GET_MODE (SUBREG_REG (op)))
+             & ~GET_MODE_MASK (GET_MODE (op))) == 0)
+       {
+         if (GET_MODE_PRECISION (mode)
+             == GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))))
+           return SUBREG_REG (op);
+         return simplify_gen_unary (ZERO_EXTEND, mode, SUBREG_REG (op),
+                                    GET_MODE (SUBREG_REG (op)));
+       }
+
 #if defined(POINTERS_EXTEND_UNSIGNED) && !defined(HAVE_ptr_extend)
       /* As we do not know which address space the pointer is referring to,
         we can do this only if the target does not support different pointer
--- gcc/combine.c.jj    2013-05-04 14:40:40.000000000 +0200
+++ gcc/combine.c       2013-07-04 15:44:59.409575170 +0200
@@ -7326,7 +7326,8 @@ make_extraction (enum machine_mode mode,
   if (pos_rtx != 0
       && GET_MODE_SIZE (pos_mode) > GET_MODE_SIZE (GET_MODE (pos_rtx)))
     {
-      rtx temp = gen_rtx_ZERO_EXTEND (pos_mode, pos_rtx);
+      rtx temp = simplify_gen_unary (ZERO_EXTEND, pos_mode, pos_rtx,
+                                    GET_MODE (pos_rtx));
 
       /* If we know that no extraneous bits are set, and that the high
         bit is not set, convert extraction to cheaper one - either
@@ -7340,7 +7341,8 @@ make_extraction (enum machine_mode mode,
                       >> 1))
                  == 0)))
        {
-         rtx temp1 = gen_rtx_SIGN_EXTEND (pos_mode, pos_rtx);
+         rtx temp1 = simplify_gen_unary (SIGN_EXTEND, pos_mode, pos_rtx,
+                                         GET_MODE (pos_rtx));
 
          /* Prefer ZERO_EXTENSION, since it gives more information to
             backends.  */
--- gcc/config/i386/i386.md.jj  2013-06-27 18:47:32.000000000 +0200
+++ gcc/config/i386/i386.md     2013-07-04 15:58:24.429243358 +0200
@@ -10474,6 +10474,39 @@ (define_insn_and_split "*jcc_bt<mode>"
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
+;; Like *jcc_bt<mode>, but expect a SImode operand 2 instead of QImode
+;; zero extended to SImode.
+(define_insn_and_split "*jcc_bt<mode>_1"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:SWI48
+                          (match_operand:SWI48 1 "register_operand" "r")
+                          (const_int 1)
+                          (match_operand:SI 2 "register_operand" "r"))
+                        (const_int 0)])
+                     (label_ref (match_operand 3))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SWI48
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 3))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
 ;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
 ;; also for DImode, this is what combine produces.
 (define_insn_and_split "*jcc_bt<mode>_mask"
--- gcc/testsuite/gcc.target/i386/pr57819.c.jj  2013-07-04 16:27:46.900877301 
+0200
+++ gcc/testsuite/gcc.target/i386/pr57819.c     2013-07-04 16:27:30.000000000 
+0200
@@ -0,0 +1,38 @@
+/* PR target/57819 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+void foo (void);
+
+__extension__ typedef __INTPTR_TYPE__ intptr_t;
+
+int
+test1 (intptr_t x, intptr_t n)
+{
+  n &= sizeof (intptr_t) * __CHAR_BIT__ - 1;
+
+  if (x & ((intptr_t) 1 << n))
+    foo ();
+
+  return 0;
+}
+
+int
+test2 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+int
+test3 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ 
- 1))))
+    foo ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */
2013-07-05  Jakub Jelinek  <ja...@redhat.com>

        PR target/57819
        * config/i386/i386.md (*jcc_bt<mode>_mask_1): New
        define_insn_and_split.

        * gcc.target/i386/pr57819.c: New test.

--- gcc/config/i386/i386.md.jj  2013-06-27 18:47:32.000000000 +0200
+++ gcc/config/i386/i386.md     2013-07-04 16:54:48.789218553 +0200
@@ -10510,6 +10510,45 @@ (define_insn_and_split "*jcc_bt<mode>_ma
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
+;; Like *jcc_bt<mode>_mask, but for the case where AND has been previously
+;; combined with a shift.
+(define_insn_and_split "*jcc_bt<mode>_mask_1"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:SWI48
+                          (match_operand:SWI48 1 "register_operand" "r")
+                          (const_int 1)
+                          (zero_extend:SI
+                            (subreg:QI
+                              (and:SI
+                                (match_operand:SI 2 "register_operand" "r")
+                                (match_operand:SI 3 "const_int_operand" "n"))
+                              0)))])
+                     (label_ref (match_operand 4))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SWI48
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 4))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
 (define_insn_and_split "*jcc_btsi_1"
   [(set (pc)
        (if_then_else (match_operator 0 "bt_comparison_operator"
--- gcc/testsuite/gcc.target/i386/pr57819.c.jj  2013-07-04 16:27:46.900877301 
+0200
+++ gcc/testsuite/gcc.target/i386/pr57819.c     2013-07-04 16:27:30.000000000 
+0200
@@ -0,0 +1,38 @@
+/* PR target/57819 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+void foo (void);
+
+__extension__ typedef __INTPTR_TYPE__ intptr_t;
+
+int
+test1 (intptr_t x, intptr_t n)
+{
+  n &= sizeof (intptr_t) * __CHAR_BIT__ - 1;
+
+  if (x & ((intptr_t) 1 << n))
+    foo ();
+
+  return 0;
+}
+
+int
+test2 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1))))
+    foo ();
+
+  return 0;
+}
+
+int
+test3 (intptr_t x, intptr_t n)
+{
+  if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ 
- 1))))
+    foo ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */

Reply via email to