https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107160
--- Comment #13 from Martin Liška <marxin at gcc dot gnu.org> --- A bit reduced test-case that can be compiled with cross compiler: $ cat pr107160.c #define N 16 float fl[N]; __attribute__ ((noipa, optimize (0))) void init () { for (int i = 0; i < N; i++) fl[i] = 1 << i; } __attribute__ ((noipa)) float foo (int n) { float sum0, sum1, sum2, sum3; sum0 = sum1 = sum2 = sum3 = 0.0f; for (int i = 0; i < n; i += 4) { sum0 += __builtin_fabs (fl[i]); sum1 += __builtin_fabs (fl[i + 1]); sum2 += __builtin_fabs (fl[i + 2]); sum3 += __builtin_fabs (fl[i + 3]); } return sum0 + sum1 + sum2 + sum3; } __attribute__ ((optimize (0))) int main () { init (); float res = foo (N); __builtin_printf ("res:%f\n", res); return 0; } x86_64: ./a.out res:65535.000000 ppc64le: ./a.out res:15.000000 so the result is wrong, it sums only first 4 elements (and not 16).