https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57534

--- Comment #26 from amker at gcc dot gnu.org ---
Note the testcase:

void timer_stop();

volatile long keepgoing = 0;
double hand_benchmark_cache_ronly( double *x, long limit, long *oloops, double
*ous) {
        long index = 0, loops = 0;
        double sum = (double)0;
        double sum2 = (double)0;
        again:   sum += x[index] + x[index+1] + x[index+2] + x[index+3];
        sum2 += x[index+4] + x[index+5] + x[index+6] + x[index+7];
        if ((index += 8) < limit)     goto again;
        else if (keepgoing)     {
                index = 0;
                goto again;
        }
        timer_stop();
        x[0] = (double)sum + (double)sum2;
        x[1] = (double)index;
}

equals to:

{
        long index = 0, loops = 0;
        double sum = (double)0;
        double sum2 = (double)0;
        do {
          index = 0;
          do {
            sum += x[index] + x[index+1] + x[index+2] + x[index+3];
            sum2 += x[index+4] + x[index+5] + x[index+6] + x[index+7];
          } while ((index += 8) < limit);
        } while (keepgoing);

        timer_stop();
        x[0] = (double)sum + (double)sum2;
        x[1] = (double)index;
}

Thus the inner loop can be well analyzed.

Interesting thing is we need threading (or similar) to canonicalize the loop, a
transformation corrupting loop structure often before.

Reply via email to