------- Comment #2 from rguenth at gcc dot gnu dot org 2009-07-04 12:33 -------
One loop is
139 0.0046 : DO l = 1 , K
622 0.0208 : IF ( B(l,j)/=ZERO ) THEN
: temp = Alpha*B(l,j)
21380 0.7146 : DO i = 1 , M
569348 19.0299 : C(i,j) = C(i,j) + temp*A(i,l)
: ENDDO
: ENDIF
: ENDDO
where C(i,j) and A(i,l) are all unaligned. As the number of iterations is
symbolic we use epilogue peeling. But instead we could have done
peeling to align the loads/stores from/to C.
SUBROUTINE DGEMM(M,N,K,Alpha,A,Lda,B,Ldb,Beta,C,Ldc)
IMPLICIT NONE
DOUBLE PRECISION , PARAMETER :: ONE = 1.0D+0 , ZERO = 0.0D+0
DOUBLE PRECISION :: Alpha , Beta
INTEGER :: K , Lda , Ldb , Ldc , M , N
DOUBLE PRECISION , DIMENSION(Lda,*) :: A
DOUBLE PRECISION , DIMENSION(Ldb,*) :: B
DOUBLE PRECISION , DIMENSION(Ldc,*) :: C
INTENT (IN) A , Alpha , B , Beta , K , Lda , Ldb , Ldc , M , N
INTENT (INOUT) C
INTEGER :: i , j , l
DOUBLE PRECISION :: temp
DO j = 1 , N
IF ( Beta==ZERO ) THEN
DO i = 1 , M
C(i,j) = ZERO
ENDDO
ELSEIF ( Beta/=ONE ) THEN
DO i = 1 , M
C(i,j) = Beta*C(i,j)
ENDDO
ENDIF
DO l = 1 , K
IF ( B(l,j)/=ZERO ) THEN
temp = Alpha*B(l,j)
DO i = 1 , M
C(i,j) = C(i,j) + temp*A(i,l)
ENDDO
ENDIF
ENDDO
ENDDO
END SUBROUTINE DGEMM
--
rguenth at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Ever Confirmed|0 |1
Last reconfirmed|0000-00-00 00:00:00 |2009-07-04 12:33:20
date| |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40648