https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59345
Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> changed: What |Removed |Added ---------------------------------------------------------------------------- Last reconfirmed|2013-12-22 00:00:00 |2014-12-6 CC| |Joost.VandeVondele at mat dot ethz | |.ch Known to fail| |4.9.2, 5.0 --- Comment #2 from Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> --- still happens with trunk. In the microbenchmark below, seems like a 3-fold overhead due to packing. This is similar to using an assumed shape dummy arg as a temp, while in the latter case, this can be fixed with the contiguous attribute. Could the solution be as simple as somehow providing the 'contiguous' attribute to compiler generated temporaries ? > gfortran -Ofast -fno-inline t.f90 > ./a.out with packing: 1.8157229999999998 sec. without packing: 0.49092599999999997 sec. assumed shape, no contiguous : 1.9047100000000006 sec. assumed shape, contiguous : 0.46692899999999948 sec. total calls to foo: 400000000 expected 200000000 > cat t.f90 MODULE M INTEGER, SAVE :: count=0 CONTAINS SUBROUTINE S1(A,foo) REAL :: A(3) CALL foo(-A) END SUBROUTINE SUBROUTINE S2(A,foo) REAL :: A(3) REAL :: B(3) B=-A CALL foo(B) END SUBROUTINE SUBROUTINE S3(A,B,foo) REAL :: A(3) REAL :: B(:) B=-A CALL foo(B) END SUBROUTINE SUBROUTINE S4(A,B,foo) REAL :: A(3) REAL, CONTIGUOUS :: B(:) B=-A CALL foo(B) END SUBROUTINE SUBROUTINE foo(A) REAL :: A(3) count=count+1 END SUBROUTINE END MODULE PROGRAM TEST USE M IMPLICIT NONE REAL :: A(3),B(3) INTEGER :: i REAL*8 :: t1,t2,t3,t4,t5,t6,t7,t8 INTEGER :: N A=0 N=100000000 CALL CPU_TIME(t1) DO i=1,N CALL S1(A,foo) ENDDO CALL CPU_TIME(t2) CALL CPU_TIME(t3) DO i=1,N CALL S2(A,foo) ENDDO CALL CPU_TIME(t4) CALL CPU_TIME(t5) DO i=1,N CALL S3(A,B,foo) ENDDO CALL CPU_TIME(t6) CALL CPU_TIME(t7) DO i=1,N CALL S4(A,B,foo) ENDDO CALL CPU_TIME(t8) WRITE(6,*) "with packing:", t2-t1, " sec." WRITE(6,*) "without packing:", t4-t3, "sec. " WRITE(6,*) "assumed shape, no contiguous :", t6-t5, "sec. " WRITE(6,*) "assumed shape, contiguous :", t8-t7, "sec. " WRITE(6,*) "total calls to foo:", count, "expected", 2*N END