https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97312

Aldy Hernandez <aldyh at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Last reconfirmed|                            |2020-10-07
     Ever confirmed|0                           |1
             Status|UNCONFIRMED                 |WAITING

--- Comment #1 from Aldy Hernandez <aldyh at gcc dot gnu.org> ---
Confirmed.

This test is checking the final assembly for a specific sequence.  I don't
speak aarch64 assembly, but the IL is different coming out of evrp.

The first culprit is this difference in the mergephi1 dump:

   _9 = .CTZ (x_6(D));
-  _10 = _9 & 31;
+  _10 = _9;

These are unsigned ints, so assuming they are 32 bits on aarch64, __builtin_ctz
is always less than 32.  This is because a CTZ of 0 is undefined according to
the GCC manual:

Built-in Function: int __builtin_ctz (unsigned int x)

    Returns the number of trailing 0-bits in x, starting at the least
significant bit position. If x is 0, the result is undefined. 

So a bitwise AND of anything less than 32 with 0x1f (31) is a no-op.

Are aarch64 int's 32-bits?

Here are the full IL differences:

--- legacy-evrp/pr90838.c.038t.mergephi1        2020-10-07 08:44:12.152358885
-0400
+++ ranger/pr90838.c.038t.mergephi1     2020-10-07 08:39:12.339296502 -0400
@@ -1,41 +1,41 @@

 ;; Function ctz1 (ctz1, funcdef_no=0, decl_uid=3587, cgraph_uid=1,
symbol_order=0)

 ctz1 (unsigned int x)
 {
   static const char table[32] =
"\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
   unsigned int _1;
   unsigned int _2;
   unsigned int _3;
   unsigned int _4;
   char _5;
   int _9;
   int _10;

   <bb 2> :
   _1 = -x_6(D);
   _2 = _1 & x_6(D);
   _3 = _2 * 125613361;
   _4 = _3 >> 27;
   _9 = .CTZ (x_6(D));
-  _10 = _9 & 31;
+  _10 = _9;
   _5 = (char) _10;
   return _10;

 }



 ;; Function ctz2 (ctz2, funcdef_no=1, decl_uid=3591, cgraph_uid=2,
symbol_order=1)

 ctz2 (unsigned int x)
 {
   static short int table[64] = {32, 0, 1, 12, 2, 6, 0, 13, 3, 0, 7, 0, 0, 0,
0, 14, 10, 4, 0, 0, 8, 0, 0, 25, 0, 0, 0, 0, 0, 21, 27, 15, 31, 11, 5, 0, 0, 0,
0, 0, 9, 0, 0, 
24, 0, 0, 20, 26, 30, 0, 0, 0, 0, 23, 0, 19, 29, 0, 22, 18, 28, 17, 16, 0};
   unsigned int _1;
   unsigned int _2;
   unsigned int _3;
   short int _4;
   int _8;

   <bb 2> :
   _1 = -x_5(D);
@@ -87,27 +87,27 @@


 ;; Function ctz4 (ctz4, funcdef_no=3, decl_uid=3601, cgraph_uid=4,
symbol_order=5)

 ctz4 (long unsigned int x)
 {
   long unsigned int lsb;
   long unsigned int _1;
   long long unsigned int _2;
   long long unsigned int _3;
   char _4;
   int _9;
   int _10;

   <bb 2> :
   _1 = -x_5(D);
   lsb_6 = _1 & x_5(D);
   _2 = lsb_6 * 283881067100198605;
   _3 = _2 >> 58;
   _9 = .CTZ (x_5(D));
-  _10 = _9 & 63;
+  _10 = _9;
   _4 = (char) _10;
   return _10;

 }

The difference in assembly matches.  We have 2 less AND's in the final output:

$ diff -u legacy.s ranger.s
--- legacy.s    2020-10-07 09:06:13.420446783 -0400
+++ ranger.s    2020-10-07 09:06:42.646646949 -0400
@@ -8,7 +8,6 @@
 ctz1:
        rbit    w0, w0
        clz     w0, w0
-       and     w0, w0, 31
        ret
        .size   ctz1, .-ctz1
        .align  2
@@ -36,7 +35,6 @@
 ctz4:
        rbit    x0, x0
        clz     x0, x0
-       and     w0, w0, 63
        ret
        .size   ctz4, .-ctz4

If my analysis is correct, someone aarch64 savvy should adjust this:

/* { dg-final { scan-assembler-times "and\t" 2 } } */

Reply via email to