Am 08.07.2017 um 13:57 schrieb Thomas Koenig:
Am 04.07.2017 um 00:06 schrieb Thomas Koenig:
attached are a few more speedups for special eoshift cases. This
time, nothing fancy, just use memcpy for copying in the
contiguous case.
Ping?
Regards
Thomas
Some benchmarks (source attached).
$ gfortran eo_bench_2.f90 && ./a.out
dim = 1 t = 0.747093916
dim = 2 t = 2.09117603
dim = 3 t = 3.07099581
$ gfortran-7 -static-libgfortran eo_bench_2.f90 && ./a.out
dim = 1 t = 1.24332905
dim = 2 t = 2.09103727
dim = 3 t = 3.05382776
$ gfortran eo_bench_3.f90 && ./a.out
dim = 1 t = 0.734890938
dim = 2 t = 2.40442204
dim = 3 t = 3.12888288
$ gfortran-7 -static-libgfortran eo_bench_3.f90 && ./a.out
dim = 1 t = 1.30460107
dim = 2 t = 2.17445374
dim = 3 t = 2.78331423
$ gfortran eo_bench_4.f90 && ./a.out
dim = 1 t = 0.777376175
dim = 2 t = 2.40524292
dim = 3 t = 3.10695219
$ gfortran-7 -static-libgfortran eo_bench_4.f90 && ./a.out
dim = 1 t = 1.39399910
dim = 2 t = 2.16738701
dim = 3 t = 3.09568548
So, we get a 65% to 78% speedup for a common use case (dim=1).
program main
implicit none
integer, parameter :: n=600
real, dimension(n,n,n) :: a, c
real, dimension(n,n) :: b
real :: t1, t2
integer :: dim
call random_number(a)
b = 0.
do dim=1,3
call cpu_time(t1)
c = eoshift(a, -3, dim=dim, boundary=b)
call cpu_time(t2)
print *,"dim = ", dim, " t = ", t2-t1
end do
end program main
program main
implicit none
integer, parameter :: n=600
real, dimension(n,n,n) :: a, c
real, dimension(n,n) :: b
real :: t1, t2
integer :: dim
integer, dimension(n,n) :: sh
real, dimension(n,n) :: sh_real
call random_number(sh_real)
sh = int(sh_real * 10 + 5)
call random_number(a)
b = 0.
do dim=1,3
call cpu_time(t1)
c = eoshift(a, shift=sh, dim=dim)
call cpu_time(t2)
print *,"dim = ", dim, " t = ", t2-t1
end do
end program main
program main
implicit none
integer, parameter :: n=600
real, dimension(n,n,n) :: a, c
real, dimension(n,n) :: b
real :: t1, t2
integer :: dim
integer, dimension(n,n) :: sh
real, dimension(n,n) :: sh_real
call random_number(sh_real)
sh = int(sh_real * 10 + 5)
call random_number(b)
call random_number(a)
b = 0.
do dim=1,3
call cpu_time(t1)
c = eoshift(a, shift=sh, dim=dim, boundary=b)
call cpu_time(t2)
print *,"dim = ", dim, " t = ", t2-t1
end do
end program main