Hello world,

the attached patch implements the blocked algorithm for
constant shift for dim > 1 for eoshift0 (which handles
the case of constant shift and constant fill value).

Speedup, as for cshift, is large.  Moving a 500*500*500
array by -3 with eo_bench.f90 (also attached):

$ gfortran -O3 eo_bench.f90 && ./a.out
 dim =            1  t =   0.451796889
 dim =            2  t =   0.183514118
 dim =            3  t =   0.184015989
$ gfortran-7 -static-libgfortran -O3 eo_bench.f90 && ./a.out
 dim =            1  t =   0.955736041
 dim =            2  t =    1.42228103
 dim =            3  t =    3.00043702

Regression-tested.  OK for trunk?

Regards

        Thomas

2017-07-01  Thomas Koenig  <tkoe...@gcc.gnu.org>

        * intrinsics/eoshift0.c:  For contiguous arrays, use
        block algorithm.  Use memcpy where possible.

2017-07-01  Thomas Koenig  <tkoe...@gcc.gnu.org>

        * gfortran/eoshift_3.f90:  New test.
! { dg-do  run }
! Check that eoshift works for three-dimensional arrays.
module x
  implicit none
contains
  subroutine eoshift_0 (array, shift, boundary, dim, res)
    real, dimension(:,:,:), intent(in) :: array
    real, dimension(:,:,:), intent(out) :: res
    integer, value :: shift
    real, optional, intent(in) :: boundary
    integer, optional, intent(in) :: dim
    integer :: s1, s2, s3
    integer :: n1, n2, n3

    real :: b
    integer :: d
    if (present(boundary)) then
       b = boundary
    else
       b = 0.0
    end if

    if (present(dim)) then
       d = dim
    else
       d = 1
    end if

    n1 = size(array,1)
    n2 = size(array,2)
    n3 = size(array,3)

    select case(dim)
    case(1)
       if (shift > 0) then
          shift = min(shift, n1)
          do s3=1,n3
             do s2=1,n2
                do s1= 1, n1 - shift
                   res(s1,s2,s3) = array(s1+shift,s2,s3)
                end do
                do s1 = n1 - shift + 1,n1
                   res(s1,s2,s3) = b
                end do
             end do
          end do

       else
          shift = max(shift, -n1)
          do s3=1,n3
             do s2=1,n2
                do s1=1,-shift
                   res(s1,s2,s3) = b
                end do
                do s1= 1-shift,n1
                   res(s1,s2,s3) = array(s1+shift,s2,s3)
                end do
             end do
          end do
       end if

    case(2)
       if (shift > 0) then
          shift = min(shift, n2)
          do s3=1,n3
             do s2=1, n2 - shift
                do s1=1,n1
                   res(s1,s2,s3) = array(s1,s2+shift,s3)
                end do
             end do
             do s2=n2 - shift + 1, n2
                do s1=1,n1
                   res(s1,s2,s3) = b
                end do
             end do
          end do
       else
          shift = max(shift, -n2)
          do s3=1,n3
             do s2=1,-shift
                do s1=1,n1
                   res(s1,s2,s3) = b
                end do
             end do
             do s2=1-shift,n2
                do s1=1,n1
                   res(s1,s2,s3) = array(s1,s2+shift,s3)
                end do
             end do
          end do
       end if

    case(3)
       if (shift > 0) then
          shift = min(shift, n3)
          do s3=1,n3 - shift
             do s2=1, n2
                do s1=1,n1
                   res(s1,s2,s3) = array(s1,s2,s3+shift)
                end do
             end do
          end do
          do s3=n3 - shift + 1, n3
             do s2=1, n2
                do s1=1,n1
                   res(s1,s2,s3) = b
                end do
             end do
          end do
       else
          shift = max(shift, -n3)
          do s3=1,-shift
             do s2=1,n2
                do s1=1,n1
                   res(s1,s2,s3) = b
                end do
             end do
          end do
          do s3=1-shift,n3
             do s2=1,n2
                do s1=1,n1
                   res(s1,s2,s3) = array(s1,s2,s3+shift)
                end do
             end do
          end do
       end if

    case default
       stop "Illegal dim"
    end select
  end subroutine eoshift_0
end module x

program main
  use x
  implicit none
  integer, parameter :: n1=2,n2=4,n3=2
  real, dimension(n1,n2,n3) :: a,b,c
  integer :: dim, shift, shift_lim
  call random_number(a)

  do dim=1,3
     if (dim == 1) then
        shift_lim = n1 + 1
     else if (dim == 2) then
        shift_lim = n2 + 1
     else
        shift_lim = n3 + 1
     end if
     do shift=-shift_lim, shift_lim
        b = eoshift(a,shift,dim=dim)
        call eoshift_0 (a, shift=shift, dim=dim, res=c)
        if (any (b /= c)) then
                print *,"dim = ", dim, "shift = ", shift
                call abort
        end if
     end do
  end do
  call random_number(b)
  c = b

  do dim=1,3
     if (dim == 1) then
        shift_lim = n1/2 + 1
     else if (dim == 2) then
        shift_lim = n2/2 + 1
     else
        shift_lim = n3/2 + 1
     end if
     
     do shift=-shift_lim, shift_lim
        b(1:n1:2,:,:) = eoshift(a(1:n1/2,:,:),shift,dim=dim)
        call eoshift_0 (a(1:n1/2,:,:), shift=shift, dim=dim, res=c(1:n1:2,:,:))
        if (any (b /= c)) call abort
     end do
  end do

end program main
Index: intrinsics/eoshift0.c
===================================================================
--- intrinsics/eoshift0.c	(Revision 249632)
+++ intrinsics/eoshift0.c	(Arbeitskopie)
@@ -53,7 +53,8 @@
   index_type len;
   index_type n;
   index_type arraysize;
-
+  bool do_blocked;
+  
   /* The compiler cannot figure out that these are set, initialize
      them to avoid warnings.  */
   len = 0;
@@ -102,39 +103,94 @@
   count[0] = 0;
   sstride[0] = -1;
   rstride[0] = -1;
+
+  if (which > 0)
+    {
+      /* Test if both ret and array are contiguous.  */
+      size_t r_ex, a_ex;
+      r_ex = 1;
+      a_ex = 1;
+      do_blocked = true;
+      dim = GFC_DESCRIPTOR_RANK (array);
+      for (n = 0; n < dim; n ++)
+	{
+	  index_type rs, as;
+	  rs = GFC_DESCRIPTOR_STRIDE (ret, n);
+	  if (rs != r_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  as = GFC_DESCRIPTOR_STRIDE (array, n);
+	  if (as != a_ex)
+	    {
+	      do_blocked = false;
+	      break;
+	    }
+	  r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n);
+	  a_ex *= GFC_DESCRIPTOR_EXTENT (array, n);
+	}
+    }
+  else
+    do_blocked = false;
+
   n = 0;
-  for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+
+  if (do_blocked)
     {
-      if (dim == which)
-        {
-          roffset = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
-          if (roffset == 0)
-            roffset = size;
-          soffset = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
-          if (soffset == 0)
-            soffset = size;
-          len = GFC_DESCRIPTOR_EXTENT(array,dim);
-        }
-      else
-        {
-          count[n] = 0;
-          extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
-          rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
-          sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
-          n++;
-        }
+      /* For contiguous arrays, use the relationship that
+
+         dimension(n1,n2,n3) :: a, b
+	 b = eoshift(a,sh,3)
+
+         can be dealt with as if
+
+	 dimension(n1*n2*n3) :: an, bn
+	 bn = eoshift(a,sh*n1*n2,1)
+
+	 so a block move can be used for dim>1.  */
+      len = GFC_DESCRIPTOR_STRIDE(array, which)
+	* GFC_DESCRIPTOR_EXTENT(array, which);
+      shift *= GFC_DESCRIPTOR_STRIDE(array, which);
+      roffset = size;
+      soffset = size;
+      for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  count[n] = 0;
+	  extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	  rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+	  sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+	  n++;
+	}
+      count[n] = 0;
+      dim = GFC_DESCRIPTOR_RANK (array) - which;
     }
-  if (sstride[0] == 0)
-    sstride[0] = size;
-  if (rstride[0] == 0)
-    rstride[0] = size;
+  else
+    {
+      for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++)
+	{
+	  if (dim == which)
+	    {
+	      roffset = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+	      if (roffset == 0)
+		roffset = size;
+	      soffset = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+	      if (soffset == 0)
+		soffset = size;
+	      len = GFC_DESCRIPTOR_EXTENT(array,dim);
+	    }
+	  else
+	    {
+	      count[n] = 0;
+	      extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim);
+	      rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim);
+	      sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim);
+	      n++;
+	    }
+	}
+      dim = GFC_DESCRIPTOR_RANK (array);
+    }
 
-  dim = GFC_DESCRIPTOR_RANK (array);
-  rstride0 = rstride[0];
-  sstride0 = sstride[0];
-  rptr = ret->base_addr;
-  sptr = array->base_addr;
-
   if ((shift >= 0 ? shift : -shift) > len)
     {
       shift = len;
@@ -148,6 +204,11 @@
 	len = len + shift;
     }
 
+  rstride0 = rstride[0];
+  sstride0 = sstride[0];
+  rptr = ret->base_addr;
+  sptr = array->base_addr;
+
   while (rptr)
     {
       /* Do the shift for this dimension.  */
@@ -161,12 +222,23 @@
           src = sptr;
           dest = &rptr[-shift * roffset];
         }
-      for (n = 0; n < len; n++)
-        {
-          memcpy (dest, src, size);
-          dest += roffset;
-          src += soffset;
-        }
+      /* If the elements are contiguous, perform a single block move.  */
+
+      if (soffset == size && roffset == size)
+	{
+	  size_t chunk = size * len;
+	  memcpy (dest, src, chunk);
+	  dest += chunk;
+	}
+      else
+	{
+	  for (n = 0; n < len; n++)
+	    {
+	      memcpy (dest, src, size);
+	      dest += roffset;
+	      src += soffset;
+	    }
+	}
       if (shift >= 0)
         {
           n = shift;
program main
  implicit none
  integer, parameter :: n=500
  real, dimension(n,n,n) :: a, c
  real :: t1, t2
  integer :: dim
  
  call random_number(a)
  do dim=1,3
     call cpu_time(t1)
     c = eoshift(a, -3, dim=dim)
     call cpu_time(t2)
     print *,"dim = ", dim, " t = ", t2-t1
  end do
end program main

Reply via email to