https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57534
--- Comment #26 from amker at gcc dot gnu.org ---
Note the testcase:
void timer_stop();
volatile long keepgoing = 0;
double hand_benchmark_cache_ronly( double *x, long limit, long *oloops, double
*ous) {
long index = 0, loops = 0;
double sum = (double)0;
double sum2 = (double)0;
again: sum += x[index] + x[index+1] + x[index+2] + x[index+3];
sum2 += x[index+4] + x[index+5] + x[index+6] + x[index+7];
if ((index += 8) < limit) goto again;
else if (keepgoing) {
index = 0;
goto again;
}
timer_stop();
x[0] = (double)sum + (double)sum2;
x[1] = (double)index;
}
equals to:
{
long index = 0, loops = 0;
double sum = (double)0;
double sum2 = (double)0;
do {
index = 0;
do {
sum += x[index] + x[index+1] + x[index+2] + x[index+3];
sum2 += x[index+4] + x[index+5] + x[index+6] + x[index+7];
} while ((index += 8) < limit);
} while (keepgoing);
timer_stop();
x[0] = (double)sum + (double)sum2;
x[1] = (double)index;
}
Thus the inner loop can be well analyzed.
Interesting thing is we need threading (or similar) to canonicalize the loop, a
transformation corrupting loop structure often before.