Am 08.07.2017 um 13:57 schrieb Thomas Koenig:
Am 04.07.2017 um 00:06 schrieb Thomas Koenig:

attached are a few more speedups for special eoshift cases.  This
time, nothing fancy, just use memcpy for copying in the
contiguous case.

Ping?

Regards

     Thomas

Some benchmarks (source attached).


$ gfortran eo_bench_2.f90 && ./a.out
 dim =            1  t =   0.747093916
 dim =            2  t =    2.09117603
 dim =            3  t =    3.07099581
$ gfortran-7 -static-libgfortran eo_bench_2.f90 && ./a.out
 dim =            1  t =    1.24332905
 dim =            2  t =    2.09103727
 dim =            3  t =    3.05382776
$ gfortran eo_bench_3.f90 && ./a.out
 dim =            1  t =   0.734890938
 dim =            2  t =    2.40442204
 dim =            3  t =    3.12888288
$ gfortran-7 -static-libgfortran eo_bench_3.f90 && ./a.out
 dim =            1  t =    1.30460107
 dim =            2  t =    2.17445374
 dim =            3  t =    2.78331423
$ gfortran eo_bench_4.f90 && ./a.out
 dim =            1  t =   0.777376175
 dim =            2  t =    2.40524292
 dim =            3  t =    3.10695219
$ gfortran-7 -static-libgfortran eo_bench_4.f90 && ./a.out
 dim =            1  t =    1.39399910
 dim =            2  t =    2.16738701
 dim =            3  t =    3.09568548

So, we get a 65% to 78% speedup for a common use case (dim=1).
program main
  implicit none
  integer, parameter :: n=600
  real, dimension(n,n,n) :: a, c
  real, dimension(n,n) :: b
  real :: t1, t2
  integer :: dim
  
  call random_number(a)
  b = 0.
  do dim=1,3
     call cpu_time(t1)
     c = eoshift(a, -3, dim=dim, boundary=b)
     call cpu_time(t2)
     print *,"dim = ", dim, " t = ", t2-t1
  end do
end program main
program main
  implicit none
  integer, parameter :: n=600
  real, dimension(n,n,n) :: a, c
  real, dimension(n,n) :: b
  real :: t1, t2
  integer :: dim
  integer, dimension(n,n) :: sh
  real, dimension(n,n) :: sh_real

  call random_number(sh_real)
  sh = int(sh_real * 10 + 5)
  call random_number(a)
  b = 0.
  do dim=1,3
     call cpu_time(t1)
     c = eoshift(a, shift=sh, dim=dim)
     call cpu_time(t2)
     print *,"dim = ", dim, " t = ", t2-t1
  end do
end program main
program main
  implicit none
  integer, parameter :: n=600
  real, dimension(n,n,n) :: a, c
  real, dimension(n,n) :: b
  real :: t1, t2
  integer :: dim
  integer, dimension(n,n) :: sh
  real, dimension(n,n) :: sh_real

  call random_number(sh_real)
  sh = int(sh_real * 10 + 5)
  call random_number(b)
  call random_number(a)
  b = 0.
  do dim=1,3
     call cpu_time(t1)
     c = eoshift(a, shift=sh, dim=dim, boundary=b)
     call cpu_time(t2)
     print *,"dim = ", dim, " t = ", t2-t1
  end do
end program main

Reply via email to