void Ekin(double *e, int *stridee, double *vx, int *stridevx, double *vy, int *stridevy, double *vz, int *stridevz, int *sz) { int i1 = sz[0]; int j1 = sz[1]; int k1 = sz[2]; int i, j, k; for (k=0; k<k1; ++k) for (j=0; j<j1; ++j) for (i=0; i<i1; ++i) { e[i + j * stridee[1] + k * stridee[2]] = 0.128 * ( ((vx[i + j * stridevx[1] + k * stridevx[2]] + vx[i+1 + j * stridevx[1] + k * stridevx[2]]) * (vx[i + j * stridevx[1] + k * stridevx[2]] + vx[i+1 + j * stridevx[1] + k * stridevx[2]])) + ((vy[i + j * stridevy[1] + k * stridevy[2]] + vy[i + (j+1) * stridevy[1] + k * stridevy[2]]) * (vy[i + j * stridevy[1] + k * stridevy[2]] + vy[i + (j+1) * stridevy[1] + k * stridevy[2]])) + ((vz[i + j * stridevz[1] + k * stridevz[2]] + vz[i + j * stridevz[1] + (k+1) * stridevz[2]]) * (vz[i + j * stridevz[1] + k * stridevz[2]] + vz[i + j * stridevz[1] + (k+1) * stridevz[2]])));
} } lim moves all the j*stridev?[1] and k*stridev?[2] to the j-loop but does not move the k*stridev?[2] to the k-loop. This results in the following asm. Ekin: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi pushl %ebx subl $44, %esp movl 40(%ebp), %eax movl $0, -24(%ebp) movl (%eax), %edx movl 4(%eax), %ecx movl 8(%eax), %eax movl %edx, -40(%ebp) movl %ecx, -36(%ebp) testl %eax, %eax movl %eax, -32(%ebp) jle .L13 .L4: movl -24(%ebp), %edx movl -36(%ebp), %eax incl %edx testl %eax, %eax movl %edx, -48(%ebp) jle .L7 movl -40(%ebp), %ecx movl $0, -16(%ebp) testl %ecx, %ecx jle .L17 .L10: movl 28(%ebp), %eax movl -24(%ebp), %ebx movl -16(%ebp), %edi movl $0, -56(%ebp) fldl .LC0 movl 8(%eax), %edx movl 4(%eax), %ecx incl %edi movl %edi, -28(%ebp) movl -16(%ebp), %edi imull %edx, %ebx movl 36(%ebp), %edx movl 4(%edx), %eax movl 8(%edx), %esi movl 12(%ebp), %edx imull %eax, %edi movl -16(%ebp), %eax movl %edi, -44(%ebp) movl 4(%edx), %edi movl -24(%ebp), %edx imull %edi, %eax movl 12(%ebp), %edi imull 8(%edi), %edx addl %edx, %eax movl -16(%ebp), %edx sall $3, %eax movl %eax, -20(%ebp) movl 20(%ebp), %eax movl 4(%eax), %edi movl -24(%ebp), %eax imull %edi, %edx movl 20(%ebp), %edi imull 8(%edi), %eax addl %eax, %edx movl -16(%ebp), %eax imull %ecx, %eax addl %ebx, %eax leal 0(,%eax,8), %edi movl -28(%ebp), %eax imull %eax, %ecx movl -24(%ebp), %eax addl %ecx, %ebx movl -44(%ebp), %ecx imull %esi, %eax sall $3, %ebx addl %ecx, %eax movl -48(%ebp), %ecx sall $3, %eax movl %eax, -52(%ebp) imull %ecx, %esi movl -44(%ebp), %ecx addl %esi, %ecx leal 0(,%ecx,8), %esi .p2align 4,,15 .L5: movl 16(%ebp), %eax movl 24(%ebp), %ecx incl -56(%ebp) fldl (%eax,%edx,8) faddl 8(%eax,%edx,8) incl %edx movl -52(%ebp), %eax addl $8, -52(%ebp) fldl (%edi,%ecx) addl $8, %edi faddl (%ecx,%ebx) addl $8, %ebx movl 32(%ebp), %ecx fldl (%eax,%ecx) faddl (%ecx,%esi) fxch %st(2) addl $8, %esi movl -20(%ebp), %eax movl 8(%ebp), %ecx fmul %st(0), %st fxch %st(1) fmul %st(0), %st faddp %st, %st(1) fxch %st(1) fmul %st(0), %st faddp %st, %st(1) fmul %st(1), %st fstpl (%eax,%ecx) addl $8, %eax movl %eax, -20(%ebp) movl -56(%ebp), %eax cmpl %eax, -40(%ebp) jne .L5 fstp %st(0) movl -28(%ebp), %edx cmpl %edx, -36(%ebp) jle .L7 .L18: movl -40(%ebp), %ecx movl %edx, -16(%ebp) testl %ecx, %ecx jg .L10 .L17: movl -16(%ebp), %ecx incl %ecx movl %ecx, -28(%ebp) movl -28(%ebp), %edx cmpl %edx, -36(%ebp) jg .L18 .L7: movl -48(%ebp), %eax cmpl %eax, -32(%ebp) movl %eax, -24(%ebp) jne .L4 .L13: addl $44, %esp popl %ebx popl %esi popl %edi popl %ebp ret -- Summary: loop-invariant-motion is not doing it's work Product: gcc Version: 4.1.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P2 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: rguenth at gcc dot gnu dot org CC: gcc-bugs at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970