Hi!

The following testcases are miscompiled (the first one since my improvements
to rotate discovery on GIMPLE, the other one for many years) because
combiner optimizes nested ROTATEs with narrowing SUBREG in between (i.e.
the outer rotate is performed in shorter precision than the inner one) to
just one ROTATE of the rotated constant.  While that (under certain
conditions) can work for shifts, it can't work for rotates where we can only
do that with rotates of the same precision.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

OT, on the other side I wonder why the code doesn't handle ROTATERT.  While
earlier the code canonicalizes ROTATERT to ROTATE with the adjusted
(constant) count, I mean if the inner op is ROTATERT, why can't it
decanonicalize the outer one back to ROTATERT and treat it like that?

2020-10-13  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/97386
        * combine.c (simplify_shift_const_1): Don't optimize nested ROTATEs if
        they have different modes.

        * gcc.c-torture/execute/pr97386-1.c: New test.
        * gcc.c-torture/execute/pr97386-2.c: New test.

--- gcc/combine.c.jj    2020-08-27 18:42:35.000000000 +0200
+++ gcc/combine.c       2020-10-12 21:40:00.165359873 +0200
@@ -11003,8 +11003,11 @@ simplify_shift_const_1 (enum rtx_code co
                break;
              /* For ((int) (cstLL >> count)) >> cst2 just give up.  Queuing
                 up outer sign extension (often left and right shift) is
-                hardly more efficient than the original.  See PR70429.  */
-             if (code == ASHIFTRT && int_mode != int_result_mode)
+                hardly more efficient than the original.  See PR70429.
+                Similarly punt for rotates with different modes.
+                See PR97386.  */
+             if ((code == ASHIFTRT || code == ROTATE)
+                 && int_mode != int_result_mode)
                break;
 
              rtx count_rtx = gen_int_shift_amount (int_result_mode, count);
--- gcc/testsuite/gcc.c-torture/execute/pr97386-1.c.jj  2020-10-12 
21:47:46.636649170 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr97386-1.c     2020-10-12 
21:47:18.681049897 +0200
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/97386 */
+
+__attribute__((noipa)) unsigned char
+foo (unsigned int c)
+{
+  return __builtin_bswap16 ((unsigned long long) (0xccccLLU << c | 0xccccLLU 
>> ((-c) & 63)));
+}
+
+int
+main ()
+{
+  unsigned char x = foo (0);
+  if (__CHAR_BIT__ == 8 && __SIZEOF_SHORT__ == 2 && x != 0xcc)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr97386-2.c.jj  2020-10-12 
21:47:50.389595376 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr97386-2.c     2020-10-12 
21:47:24.253970009 +0200
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/97386 */
+
+__attribute__((noipa)) unsigned
+foo (int x)
+{
+  unsigned long long a = (0x800000000000ccccULL << x) | (0x800000000000ccccULL 
>> (64 - x));
+  unsigned int b = a;
+  return (b << 24) | (b >> 8);
+}
+
+int
+main ()
+{
+  if (__CHAR_BIT__ == 8
+      && __SIZEOF_INT__ == 4
+      &&  __SIZEOF_LONG_LONG__ == 8
+      && foo (1) != 0x99000199U)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to