https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99414

Jan Hubicka <hubicka at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|s235 and s233 benchmarks of |s235, s2233 and s233
                   |TSVC is vectorized better   |benchmarks of TSVC is
                   |by icc than gcc (loop       |vectorized better by icc
                   |interchange)                |than gcc (loop interchange)

--- Comment #3 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
this one is 7s with gcc and 0.4s with icc.

typedef float real_t;

#define iterations 100000
#define LEN_1D 32000
#define LEN_2D 256
// array definitions

real_t
aa[LEN_2D][LEN_2D],bb[LEN_2D][LEN_2D],cc[LEN_2D][LEN_2D],tt[LEN_2D][LEN_2D];

int main(struct args_t * func_args)
{
//    loop interchange
//    interchanging with one of two inner loops

    for (int nl = 0; nl < 100*(iterations/LEN_2D); nl++) {
        for (int i = 1; i < LEN_2D; i++) {
            for (int j = 1; j < LEN_2D; j++) {
                aa[j][i] = aa[j-1][i] + cc[j][i];
            }
            for (int j = 1; j < LEN_2D; j++) {
                bb[i][j] = bb[i-1][j] + cc[i][j];
            }
        }
        dummy();
    }

   return aa[0][0];
}

Reply via email to