Consider the attached Fortran code (the most expensive routine, computation-wise, in our weather forecasting model).

verint.s.7.3 is the result of:

gfortran -g -O3 -S -march=native -mtune=native verint.f

using release 7.3.

verint.s.8.1 is the result of:

gfortran -g -O3 -S -march=native -mtune=native verint.f

using the recently released GCC 8.1.

$ wc -l verint.s.7.3 verint.s.8.1
  7818 verint.s.7.3
  6087 verint.s.8.1

$ grep vfma verint.s.7.3 | wc -l
381
$ grep vfma verint.s.8.1 | wc -l
254

but:

$ grep vfma verint.s.7.3 | grep -v ss |  wc -l
127
$ grep vfma verint.s.8.1 | grep -v ss |  wc -l
127

and:

$ grep movaps verint.s.7.3 | wc -l
306
$ grep movaps verint.s.8.3 | wc -l
270

Finally:

$ grep zmm verint.s.7.3 | wc -l
1494
$ grep zmm verint.s.8.1 | wc -l
0
$ grep ymm verint.s.7.3 | wc -l
379
$ grep ymm verint.s.8.1 | wc -l
1464

I haven't had the opportunity to test this for speed (is quite complicated, as I have to build several support libraries with 8.1, like openmpi, netcdf, hdf{4|5}, fftw ...)

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/
Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news
# 1 "/scratch/hirlam/hl_home/MPI/lib/src/grdy/verint.F"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "/scratch/hirlam/hl_home/MPI/lib/src/grdy/verint.F"
c Library:grdy $RCSfile$, $Revision: 7536 $
c checked in by $Author: ovignes $ at $Date: 2009-12-18 14:23:36 +0100 (Fri, 18 Dec 2009) $
c $State$, $Locker$
c $Log$
c Revision 1.3  1999/04/22 09:30:45  DagBjoerge
c MPP code
c
c Revision 1.2  1999/03/09 10:23:13  GerardCats
c Add SGI paralllellisation directives DOACROSS
c
c Revision 1.1  1996/09/06 13:12:18  GCats
c Created from grdy.apl, 1 version 2.6.1, by Gerard Cats
c
      SUBROUTINE VERINT (
     I   KLON   , KLAT   , KLEV   , KINT  , KHALO
     I , KLON1  , KLON2  , KLAT1  , KLAT2
     I , KP     , KQ     , KR
     R , PARG   , PRES
     R , PALFH  , PBETH
     R , PALFA  , PBETA  , PGAMA   )
C
C*******************************************************************
C
C  VERINT - THREE DIMENSIONAL INTERPOLATION
C
C  PURPOSE:
C
C  THREE DIMENSIONAL INTERPOLATION
C
C  INPUT PARAMETERS:
C
C  KLON      NUMBER OF GRIDPOINTS IN X-DIRECTION
C  KLAT      NUMBER OF GRIDPOINTS IN Y-DIRECTION
C  KLEV      NUMBER OF VERTICAL LEVELS
C  KINT      TYPE OF INTERPOLATION
C            = 1 - LINEAR
C            = 2 - QUADRATIC
C            = 3 - CUBIC
C            = 4 - MIXED CUBIC/LINEAR
C  KLON1     FIRST GRIDPOINT IN X-DIRECTION
C  KLON2     LAST  GRIDPOINT IN X-DIRECTION
C  KLAT1     FIRST GRIDPOINT IN Y-DIRECTION
C  KLAT2     LAST  GRIDPOINT IN Y-DIRECTION
C  KP        ARRAY OF INDEXES FOR HORIZONTAL DISPLACEMENTS
C  KQ        ARRAY OF INDEXES FOR HORIZONTAL DISPLACEMENTS
C  KR        ARRAY OF INDEXES FOR VERTICAL   DISPLACEMENTS
C  PARG      ARRAY OF ARGUMENTS
C  PALFH     ALFA HAT
C  PBETH     BETA HAT
C  PALFA     ARRAY OF WEIGHTS IN X-DIRECTION
C  PBETA     ARRAY OF WEIGHTS IN Y-DIRECTION
C  PGAMA     ARRAY OF WEIGHTS IN VERTICAL DIRECTION
C
C  OUTPUT PARAMETERS:
C
C  PRES      INTERPOLATED FIELD
C
C  HISTORY:
C
C  J.E. HAUGEN       1      1992
C
C*******************************************************************
C
      IMPLICIT NONE
C
      INTEGER KLON   , KLAT   , KLEV   , KINT   , KHALO,
     I        KLON1  , KLON2  , KLAT1  , KLAT2
C
      INTEGER   KP(KLON,KLAT), KQ(KLON,KLAT), KR(KLON,KLAT)
      REAL    PARG(2-KHALO:KLON+KHALO-1,2-KHALO:KLAT+KHALO-1,KLEV)  ,   
     R        PRES(KLON,KLAT)     ,
     R       PALFH(KLON,KLAT)     ,  PBETH(KLON,KLAT)  ,
     R       PALFA(KLON,KLAT,4)   ,  PBETA(KLON,KLAT,4),
     R       PGAMA(KLON,KLAT,4)
C
      INTEGER JX, JY, IDX, IDY, ILEV
      REAL Z1MAH, Z1MBH
C
      IF (KINT.EQ.1) THEN
C  LINEAR INTERPOLATION
C
      DO JY = KLAT1,KLAT2
      DO JX = KLON1,KLON2
         IDX  = KP(JX,JY)
         IDY  = KQ(JX,JY)
         ILEV = KR(JX,JY)
C
         PRES(JX,JY) = PGAMA(JX,JY,1)*(
C
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY-1,ILEV-1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY  ,ILEV-1) ) )
C    +
     +               + PGAMA(JX,JY,2)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY-1,ILEV  ) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY  ,ILEV  ) ) )
      ENDDO
      ENDDO
C
      ELSE
     +IF (KINT.EQ.2) THEN
C  QUADRATIC INTERPOLATION
C
      DO JY = KLAT1,KLAT2
      DO JX = KLON1,KLON2
         IDX  = KP(JX,JY)
         IDY  = KQ(JX,JY)
         ILEV = KR(JX,JY)
C
         PRES(JX,JY) = PGAMA(JX,JY,1)*(
C
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY-1,ILEV-1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY  ,ILEV-1) )
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY+1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY+1,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY+1,ILEV-1) ) )
C    +
     +               + PGAMA(JX,JY,2)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY-1,ILEV  ) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY  ,ILEV  ) )
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY+1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY+1,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY+1,ILEV  ) ) )
C    +
     +               + PGAMA(JX,JY,3)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY-1,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY-1,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY-1,ILEV+1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY  ,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY  ,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY  ,ILEV+1) )
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-1,IDY+1,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX  ,IDY+1,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX+1,IDY+1,ILEV+1) ) )
      ENDDO
      ENDDO
C
      ELSE
     +IF (KINT.EQ.3) THEN
C  CUBIC INTERPOLATION
C
      DO JY = KLAT1,KLAT2
      DO JX = KLON1,KLON2
         IDX  = KP(JX,JY)
         IDY  = KQ(JX,JY)
         ILEV = KR(JX,JY)
C
         PRES(JX,JY) = PGAMA(JX,JY,1)*(
C
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-2,ILEV-2)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-2,ILEV-2)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-2,ILEV-2)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-2,ILEV-2) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV-2)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV-2)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV-2)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV-2) ) 
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV-2)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV-2)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV-2)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV-2) )
     + + PBETA(JX,JY,4)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY+1,ILEV-2)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY+1,ILEV-2)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY+1,ILEV-2)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY+1,ILEV-2) ) )
C    +
     +               + PGAMA(JX,JY,2)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-2,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-2,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-2,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-2,ILEV-1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV-1) ) 
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV-1) )
     + + PBETA(JX,JY,4)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY+1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY+1,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY+1,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY+1,ILEV-1) ) )
C    +
     +               + PGAMA(JX,JY,3)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-2,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-2,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-2,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-2,ILEV  ) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV  ) ) 
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV  ) )
     + + PBETA(JX,JY,4)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY+1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY+1,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY+1,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY+1,ILEV  ) ) )
C    +
     +               + PGAMA(JX,JY,4)*(
C    +
     +   PBETA(JX,JY,1)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-2,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-2,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-2,ILEV+1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-2,ILEV+1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV+1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV+1) )
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV+1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV+1) )
     + + PBETA(JX,JY,4)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY+1,ILEV+1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY+1,ILEV+1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY+1,ILEV+1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY+1,ILEV+1) ) )
      ENDDO
      ENDDO
C
      ELSE
     +IF (KINT.EQ.4) THEN
C  MIXED CUBIC/LINEAR INTERPOLATION
C
      DO JY = KLAT1,KLAT2
      DO JX = KLON1,KLON2
         IDX  = KP(JX,JY)
         IDY  = KQ(JX,JY)
         ILEV = KR(JX,JY)
C
         Z1MAH = 1.0 - PALFH(JX,JY)
         Z1MBH = 1.0 - PBETH(JX,JY)
C
         PRES(JX,JY) = PGAMA(JX,JY,1)*(
C
     +   PBETH(JX,JY)  *( PALFH(JX,JY)  *PARG(IDX-1,IDY-1,ILEV-2)
     +                  + Z1MAH         *PARG(IDX  ,IDY-1,ILEV-2) )
     + + Z1MBH         *( PALFH(JX,JY)  *PARG(IDX-1,IDY  ,ILEV-2)
     +                  + Z1MAH         *PARG(IDX  ,IDY  ,ILEV-2) ) )
C    +
     +               + PGAMA(JX,JY,4)*(
C    +
     +   PBETH(JX,JY)  *( PALFH(JX,JY)  *PARG(IDX-1,IDY-1,ILEV+1)
     +                  + Z1MAH         *PARG(IDX  ,IDY-1,ILEV+1) )
     + + Z1MBH         *( PALFH(JX,JY)  *PARG(IDX-1,IDY  ,ILEV+1)
     +                  + Z1MAH         *PARG(IDX  ,IDY  ,ILEV+1) ) )
C    +
     +               + PGAMA(JX,JY,2)*(
C    +
     +   PBETA(JX,JY,1)*( PALFH(JX,JY)  *PARG(IDX-1,IDY-2,ILEV-1)
     +                  + Z1MAH         *PARG(IDX  ,IDY-2,ILEV-1) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV-1) )
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV-1)
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV-1) )
     + + PBETA(JX,JY,4)*( PALFH(JX,JY)  *PARG(IDX-1,IDY+1,ILEV-1)
     +                  + Z1MAH         *PARG(IDX  ,IDY+1,ILEV-1) ) )
C    +
     +               + PGAMA(JX,JY,3)*(
C    +
     +   PBETA(JX,JY,1)*( PALFH(JX,JY)  *PARG(IDX-1,IDY-2,ILEV  )
     +                  + Z1MAH         *PARG(IDX  ,IDY-2,ILEV  ) )
     + + PBETA(JX,JY,2)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY-1,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY-1,ILEV  ) ) 
     + + PBETA(JX,JY,3)*( PALFA(JX,JY,1)*PARG(IDX-2,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,2)*PARG(IDX-1,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,3)*PARG(IDX  ,IDY  ,ILEV  )
     +                  + PALFA(JX,JY,4)*PARG(IDX+1,IDY  ,ILEV  ) )
     + + PBETA(JX,JY,4)*( PALFH(JX,JY)  *PARG(IDX-1,IDY+1,ILEV  )
     +                  + Z1MAH         *PARG(IDX  ,IDY+1,ILEV  ) ) )
      ENDDO
      ENDDO
C
      ENDIF
C
      RETURN
      END

Reply via email to