The following doesn't run as fast as the 'hand-optimised' routine provided as
well (using current 4.2 on an opteron) using -ffast-math -O2 (makes a factor of
2 difference here). I've tried a number of further switches, but didn't manage
to find a case where the simply loop was as fast as the other. 

! simple loop
! assume N is even
SUBROUTINE S31(a,b,c,N)
 IMPLICIT NONE
 integer :: N
 real*8  :: a(N),b(N),c
 integer :: i
 c=0.0D0
 DO i=1,N
   c=c+a(i)*b(i)
 ENDDO
END SUBROUTINE

! 'improved' loop
SUBROUTINE S32(a,b,c,N)
 IMPLICIT NONE
 integer :: N
 real*8  :: a(N),b(N),c,tmp
 integer :: i
 c=0.0D0
 tmp=0.0D0
 DO i=1,N,2
    c=c+a(i)*b(i)
    tmp=tmp+a(i+1)*b(i+1)
 ENDDO
 c=c+tmp
END SUBROUTINE


-- 
           Summary: Missed optimisation
           Product: gcc
           Version: 4.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: jv244 at cam dot ac dot uk


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25621


Reply via email to