https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84106

--- Comment #6 from Daniel Fruzynski <bugzi...@poradnik-webmastera.com> ---
When you will be revisiting your cost-model for loops, please also take a look
on this code. test2 has one assignment moved to separate loops, and it is about
twice as fast as test1 function (for gcc 4.8.5).

[code]
#include <stdint.h>
#include <string.h>

#define N 9

int a1[N][N];
int a2[N][N];
int a3[N][N];
uint16_t a4[N][N-1];

void test1()
{
    for (int i = 0; i < N; ++i)
    {
        for (int j = 0; j < N; ++j)
        {
            a2[i][j] = a1[i][j];
            a3[i][j] = 1u << a1[i][j];
            if (i > 0)
              a4[j][i-1] = a3[i][j];
       }
    }
}

void test2()
{
    for (int i = 0; i < N; ++i)
    {
        for (int j = 0; j < N; ++j)
        {
            a2[i][j] = a1[i][j];
            a3[i][j] = 1u << a1[i][j];
        }
    }
    for (int i = 1; i < N; ++i)
    {
        for (int j = 0; j < N; ++j)
        {
            a4[j][i-1] = a3[i][j];
        }
    }
}
[/code]

Reply via email to