https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107160

--- Comment #13 from Martin Liška <marxin at gcc dot gnu.org> ---
A bit reduced test-case that can be compiled with cross compiler:

$ cat pr107160.c
#define N 16
float fl[N];

__attribute__ ((noipa, optimize (0))) void
init ()
{
  for (int i = 0; i < N; i++)
    fl[i] = 1 << i;
}

__attribute__ ((noipa)) float
foo (int n)
{
  float sum0, sum1, sum2, sum3;
  sum0 = sum1 = sum2 = sum3 = 0.0f;

  for (int i = 0; i < n; i += 4)
    {
      sum0 += __builtin_fabs (fl[i]);
      sum1 += __builtin_fabs (fl[i + 1]);
      sum2 += __builtin_fabs (fl[i + 2]);
      sum3 += __builtin_fabs (fl[i + 3]);
    }

  return sum0 + sum1 + sum2 + sum3;
}

__attribute__ ((optimize (0))) int
main ()
{
  init ();
  float res = foo (N);
  __builtin_printf ("res:%f\n", res);
  return 0;
}

x86_64:
./a.out 
res:65535.000000

ppc64le:
./a.out
res:15.000000

so the result is wrong, it sums only first 4 elements (and not 16).

Reply via email to