https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65468
--- Comment #1 from vries at gcc dot gnu.org --- Using the patch submitted for gomp-4_0-branch at https://gcc.gnu.org/ml/gcc-patches/2014-07/msg01881.html, we get a simple loop: ... bar._omp_fn.0 (struct .omp_data_s.0 & restrict .omp_data_i) { int i; int a; int _3; int * _10; unsigned int pretmp_14; unsigned int _16; unsigned int _17; unsigned int _19; unsigned int prephitmp_22; <bb 2>: _3 = __builtin_omp_get_num_threads (); i_4 = __builtin_omp_get_thread_num (); if (i_4 <= 9) goto <bb 3>; else goto <bb 6>; <bb 3>: # a_5 = PHI <0(2)> # i_2 = PHI <i_4(2)> <bb 4>: # a_18 = PHI <a_5(3), a_7(4)> # i_21 = PHI <i_2(3), i_15(4)> a_7 = a_18 + i_21; _19 = (unsigned int) _3; _17 = (unsigned int) i_21; _16 = _17 + _19; i_15 = (int) _16; if (i_15 <= 9) goto <bb 4>; else goto <bb 5>; <bb 5>: pretmp_14 = (unsigned int) a_7; <bb 6>: # prephitmp_22 = PHI <pretmp_14(5), 0(2)> _10 = &.omp_data_i_9(D)->a; __atomic_fetch_add_4 (_10, prephitmp_22, 0); [tail call] return; } ...