Re: [R] Aggregate matrix in a 2 by 2 manor

Jeff Newmiller Sat, 30 Jul 2016 23:14:42 -0700

If you don't need all that FUN flexibility, you can get this done wayfaster with the aperm and colMeans functions:


tst <- matrix( seq.int( 1440 * 360 )
             , ncol = 1440
             , nrow = 360
             )
tst.small <- matrix( seq.int( 8 * 4 )
                   , ncol = 8
                   , nrow = 4
                   )
aggregate.nx.ny.expand.grid <- function( dta, nx = 2, ny = 2, FUN = mean, ... )
{
  ilon <- seq( 1, ncol( dta ), nx )
  ilat <- seq( 1, nrow( dta ), ny )
  cells <- as.matrix( expand.grid( ilat, ilon ) )
  blocks <- apply( cells
                 , 1
                 , function( x ) dta[ x[ 1 ]:( x[ 1 ] + 1 ), x[ 2 ]:( x[ 2 ] + 
1 ) ] )
  block.means <- colMeans( blocks )
  matrix( block.means
        , nrow( dta ) / ny
        , ncol( dta ) / nx
        )
}


aggregate.nx.ny.array.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) 
{
  a <- array( dta
            , dim = c( ny
                     , nrow( dta ) %/% ny
                     , nx
                     , ncol( dta ) %/% nx
                     )
            )
  apply( a, c( 2, 4 ), FUN, ... )
}

aggregate.nx.ny.array.aperm.mean <- function( dta, nx = 2, ny = 2, ... ) {
  # number of rows in result
  nnr <- nrow( dta ) %/% ny
  # number of columns in result
  nnc <- ncol( dta ) %/% nx
  # number of values to take mean of
  nxny <- nx * ny
  # describe existing layout of values in dta as 4-d array
  a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
  # swap data in dimensions 2 and 3
  a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
  # treat first two dimensions as column vectors, remaining as columns
  a3 <- matrix( a2, nrow = nxny )
  # fast calculation of column means
  v <- colMeans( a3, ... )
  # reframe result vector as a matrix
  matrix( v, ncol = nnc )
}

aggregate.nx.ny.array.aperm.apply <- function( dta, nx = 2, ny = 2, FUN = mean, 
... ) {
  # number of rows in result
  nnr <- nrow( dta ) %/% ny
  # number of columns in result
  nnc <- ncol( dta ) %/% nx
  # number of values to apply FUN to
  nxny <- nx * ny
  # describe existing layout of values in dta as 4-d array
  a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
  # swap data in dimensions 2 and 3
  a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
  # treat first two dimensions as column vectors, remaining as columns
  a3 <- matrix( a2, nrow = nxny )
  # apply FUN to column vectors
  v <- apply( a3, 2, FUN = FUN, ... )
  matrix( v, ncol = nnc )
}
test1 <- aggregate.nx.ny.expand.grid( tst )
test2 <- aggregate.nx.ny.array.apply( tst )
test3 <- aggregate.nx.ny.array.aperm.mean( tst )
test4 <- aggregate.nx.ny.array.aperm.apply( tst )
library(microbenchmark)
microbenchmark(
  aggregate.nx.ny.expand.grid( tst, 2, 2, mean, na.rm = TRUE )
, aggregate.nx.ny.array.apply( tst, 2, 2, mean, na.rm = TRUE )
, aggregate.nx.ny.array.aperm.mean( tst, 2, 2, na.rm = TRUE )
, aggregate.nx.ny.array.aperm.apply( tst, 2, 2, mean, na.rm = TRUE )
)
#Unit: milliseconds
#                                                             expr        min
#       aggregate.nx.ny.expand.grid(tst, 2, 2, mean, na.rm = TRUE) 628.528322
#       aggregate.nx.ny.array.apply(tst, 2, 2, mean, na.rm = TRUE) 846.883314
#        aggregate.nx.ny.array.aperm.mean(tst, 2, 2, na.rm = TRUE)   8.904369
# aggregate.nx.ny.array.aperm.apply(tst, 2, 2, mean, na.rm = TRUE) 619.691851
#         lq       mean     median        uq      max neval cld
# 675.470967  916.39630  778.54090  873.9754 2452.695   100  b
# 920.831966 1126.94691 1000.33830 1094.9233 3412.639   100   c
#   9.191747   21.98528   10.30099   15.9169  158.687   100 a
# 733.246331  936.73359  757.58383  844.2016 2824.557   100  b


On Sat, 30 Jul 2016, Jeff Newmiller wrote:

For the record, the array.apply code can be fixed as below, but then it is 
slower than the expand.grid version.

aggregate.nx.ny.array.apply <- function(dta,nx=2,ny=2, FUN=mean,...)
{
  a <- array(dta, dim = c(ny, nrow( dta ) %/% ny, nx, ncol( dta ) %/% nx))
 apply( a, c(2, 4), FUN, ... )
}

--
Sent from my phone. Please excuse my brevity.

On July 30, 2016 11:06:16 AM PDT, "Anthoni, Peter (IMK)" 
<peter.anth...@kit.edu> wrote:

Hi all,

thanks for the suggestions, I did some timing tests, see below.
Unfortunately the aggregate.nx.ny.array.apply, does not produce the
expected result.
So the fastest seems to be the aggregate.nx.ny.expand.grid, though the
double for loop is not that much slower.

many thanks
Peter

tst=matrix(1:(1440*360),ncol=1440,nrow=360)
system.time( {for(i in 1:10)

tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
  user  system elapsed
11.227   0.073  11.371

system.time( {for(i in 1:10)

tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
  user  system elapsed
26.354   0.475  26.880

system.time( {for(i in 1:10)

tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
  user  system elapsed
 9.683   0.055   9.763

system.time( {for(i in 1:10)

tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
  user  system elapsed
 7.693   0.055   7.800

tst.small=matrix(1:(8*4),ncol=8,nrow=4)
aggregate.nx.ny.forloop = function(data,nx=2,ny=2, FUN=mean,...)

+ {
+   nlon=nrow(data)
+   nlat=ncol(data)
+   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
+   dim(newdata)
+   for(ilon in seq(1,nlon,nx)) {
+     for(ilat in seq(1,nlat,ny)) {
+       ilon_new=1+(ilon-1)/nx
+       ilat_new=1+(ilat-1)/ny
+       newdata[ilon_new,ilat_new] = FUN(data[ilon+0:1,ilat+0:1],...)
+     }
+   }
+   newdata
+ }

aggregate.nx.ny.forloop(tst.small)

    [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5


aggregate.nx.ny.interaction = function(data,nx=2,ny=2, FUN=mean,...)

+ {
+
+   nlon=nrow(data)
+   nlat=ncol(data)
+   newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
+   newdata[] <- tapply( data, interaction( (row(data)+1) %/% 2,
(col(data)+1) %/% 2 ), FUN, ...)
+   newdata
+ }

aggregate.nx.ny.interaction(tst.small)

    [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5


aggregate.nx.ny.expand.grid = function(data,nx=2,ny=2, FUN=mean,...)

+ {
+   ilon <- seq(1,ncol(data),nx)
+   ilat <- seq(1,nrow(data),ny)
+   cells <- as.matrix(expand.grid(ilat, ilon))
+   blocks <- apply(cells, 1, function(x)
data[x[1]:(x[1]+1),x[2]:(x[2]+1)])
+   block.means <- colMeans(blocks)
+   matrix(block.means, nrow(data)/ny, ncol(data)/nx)
+ }

aggregate.nx.ny.expand.grid(tst.small)

    [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5


aggregate.nx.ny.array.apply = function(data,nx=2,ny=2, FUN=mean,...)

{
+   a <- array(data, dim = c(ny, nrow( data ) %/% ny, ncol( data ) %/%
nx))
+   apply( a, c(2, 3), FUN, ... )
+ }

aggregate.nx.ny.array.apply(tst.small)

    [,1] [,2] [,3] [,4]
[1,]  1.5  5.5  9.5 13.5
[2,]  3.5  7.5 11.5 15.5

On 28 Jul 2016, at 00:26, David Winsemius <dwinsem...@comcast.net>

wrote:

On Jul 27, 2016, at 12:02 PM, Jeff Newmiller

<jdnew...@dcn.davis.ca.us> wrote:


An alternative (more compact, not necessarily faster, because apply

is still a for loop inside):


f <- function( m, nx, ny ) {
# redefine the dimensions of my
a <- array( m
           , dim = c( ny
                  , nrow( m ) %/% ny
                  , ncol( m ) %/% nx )
          )
# apply mean over dim 1
apply( a, c( 2, 3 ), FUN=mean )
}
f( tst, nx, ny )


Here's an apparently loopless strategy, although I suspect the code

for interaction (and maybe tapply as well?) uses a loop.



tst_2X2 <- matrix(NA, ,ncol=4,nrow=2)

tst_2x2[] <- tapply( tst, interaction( (row(tst)+1) %/% 2,

(col(tst)+1) %/% 2 ), mean)


tst_2x2

    [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5

--
David.


--
Sent from my phone. Please excuse my brevity.

On July 27, 2016 9:08:32 AM PDT, David L Carlson <dcarl...@tamu.edu>

wrote:

This should be faster. It uses apply() across the blocks.

ilon <- seq(1,8,nx)
ilat <- seq(1,4,ny)
cells <- as.matrix(expand.grid(ilat, ilon))
blocks <- apply(cells, 1, function(x) tst[x[1]:(x[1]+1),

x[2]:(x[2]+1)])

block.means <- colMeans(blocks)
tst_2x2 <- matrix(block.means, 2, 4)
tst_2x2

  [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5

-------------------------------------
David L Carlson
Department of Anthropology
Texas A&M University
College Station, TX 77840-4352



-----Original Message-----
From: R-help [mailto:r-help-boun...@r-poject.org] On Behalf Of

Anthoni,

Peter (IMK)
Sent: Wednesday, July 27, 2016 6:14 AM
To: r-help@r-project.org
Subject: [R] Aggregate matrix in a 2 by 2 manor

Hi all,

I need to aggregate some matrix data (1440x720) to a lower

dimension

(720x360) for lots of years and variables

I can do double for loop, but that will be slow. Anybody know a

quicker

way?

here an example with a smaller matrix size:

tst=matrix(1:(8*4),ncol=8,nrow=4)
tst_2x2=matrix(NA,ncol=4,nrow=2)
nx=2
ny=2
for(ilon in seq(1,8,nx)) {
for (ilat in seq(1,4,ny)) {
 ilon_2x2=1+(ilon-1)/nx
 ilat_2x2=1+(ilat-1)/ny
 tst_2x2[ilat_2x2,ilon_2x2] = mean(tst[ilat+0:1,ilon+0:1])
}
}

tst
tst_2x2

tst

  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]    1    5    9   13   17   21   25   29
[2,]    2    6   10   14   18   22   26   30
[3,]    3    7   11   15   19   23   27   31
[4,]    4    8   12   16   20   24   28   32

tst_2x2

  [,1] [,2] [,3] [,4]
[1,]  3.5 11.5 19.5 27.5
[2,]  5.5 13.5 21.5 29.5


I though a cast to 3d-array might do the trick and apply over the

new

dimension, but that does not work, since it casts the data along

the

row.

matrix(apply(array(tst,dim=c(nx,ny,8)),3,mean),nrow=nrow(tst)/ny)

  [,1] [,2] [,3] [,4]
[1,]  2.5 10.5 18.5 26.5
[2,]  6.5 14.5 22.5 30.5


cheers
Peter

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide

http://www.R-project.org/posting-guide.html

and provide commented, minimal, self-contained, reproducible code.


David Winsemius
Alameda, CA, USA


______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnew...@dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                      Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] Aggregate matrix in a 2 by 2 manor

Reply via email to