https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103222
Jan Hubicka <hubicka at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |NEW Ever confirmed|0 |1 CC| |aldyh at gcc dot gnu.org, | |hubicka at gcc dot gnu.org Last reconfirmed| |2021-11-13 --- Comment #1 from Jan Hubicka <hubicka at gcc dot gnu.org> --- So with -O1 we get: int main () { int16_t a_lsm.11; int _2; unsigned short a.5_4; unsigned short _11; short int _12; <bb 2> [local count: 1073741835]: a_lsm.11_16 = a; a.5_4 = (unsigned short) a_lsm.11_16; _11 = a.5_4 + 2; _12 = (short int) _11; a = _12; _2 = (int) _12; printf ("%d\n", _2); return 0; } while with -O2 we get: int main () { int _2; short int a.3_8; unsigned short a.5_9; unsigned short _10; short int _11; <bb 2> [local count: 1073741824]: a.3_8 = a; a.5_9 = (unsigned short) a.3_8; _10 = a.5_9 + 1; _11 = (short int) _10; a = _11; _2 = (int) _11; printf ("%d\n", _2); return 0; } Adding always_inline to func_2 makes -O1 and -O2 to agree up to ccp2 110t.mergephi2 and then 111t.threadfull makes a difference: + FAIL: path through PHI in bb3 (incoming bb:2) crosses loop +path: 2->3->xx REJECTED +Checking profitability of path (backwards): bb:3 (3 insns) bb:5 (latch) + Control statement insns: 2 + Overall: 1 insns +Checking profitability of path (backwards): bb:3 (3 insns) bb:5 (latch) + Control statement insns: 2 + Overall: 1 insns + [1] Registering jump thread: (5, 3) incoming edge; (3, 4) nocopy; +path: 5->3->4 SUCCESS +Jump threading proved probability of edge 3->4 too small (it is 11.0% (guessed) should be always (guessed)) int main () { - uint32_t p3; uint32_t p2; + uint32_t p3; short int a.0_1; int _2; short int a.3_8; @@ -13,28 +72,15 @@ short int _11; <bb 2> [local count: 1073741824]: - - <bb 3> [local count: 9761289362]: - # p3_7 = PHI <1(2), 0(5)> - # p2_17 = PHI <1(2), p3_7(5)> - if (p2_17 != 0) - goto <bb 5>; [89.00%] - else - goto <bb 4>; [11.00%] - - <bb 4> [local count: 1073741824]: - a.0_1 = a; - _2 = (int) a.0_1; - printf ("%d\n", _2); - return 0; - - <bb 5> [local count: 8687547547]: a.3_8 = a; a.5_9 = (unsigned short) a.3_8; _10 = a.5_9 + 1; _11 = (short int) _10; a = _11; - goto <bb 3>; [100.00%] + a.0_1 = a; + _2 = (int) a.0_1; + printf ("%d\n", _2); + return 0; } and --disable-tree-threadfull1 fixes the difference.