Hi Jeff, many thanks, that one is the Speedy Gonzalles out of all. Can also do some FUN stuff.
aggregate.nx.ny.array.aperm <- function( dta, nx = 2, ny = 2, FUN=colMeans, ... ) { # number of rows in result nnr <- nrow( dta ) %/% ny # number of columns in result nnc <- ncol( dta ) %/% nx # number of values to take mean of nxny <- nx * ny # describe existing layout of values in dta as 4-d array a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) ) # swap data in dimensions 2 and 3 a2 <- aperm( a1, c( 1, 3, 2, 4 ) ) # treat first two dimensions as column vectors, remaining as columns a3 <- matrix( a2, nrow = nxny ) # fast calculation of column means v <- FUN( a3, ... ) # reframe result vector as a matrix matrix( v, ncol = nnc ) } > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)}) user system elapsed 14.003 0.271 14.663 > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)}) user system elapsed 32.686 1.175 35.012 > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)}) user system elapsed 9.590 0.197 9.951 > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)}) user system elapsed 8.391 0.174 8.737 > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colMeans,na.rm=T)}) user system elapsed 0.195 0.019 0.216 > system.time( {for(i in 1:10) > tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colSums,na.rm=T)}) user system elapsed 0.169 0.017 0.188 > aggregate.nx.ny.array.aperm(tst.small,FUN=colMeans) [,1] [,2] [,3] [,4] [1,] 3.5 11.5 19.5 27.5 [2,] 5.5 13.5 21.5 29.5 > aggregate.nx.ny.array.aperm(tst.small,FUN=colSums) [,1] [,2] [,3] [,4] [1,] 14 46 78 110 [2,] 22 54 86 118 > aggregate.nx.ny.forloop(tst.small,FUN=sum) [,1] [,2] [,3] [,4] [1,] 14 46 78 110 [2,] 22 54 86 118 cheers Peter > On 31 Jul 2016, at 08:13, Jeff Newmiller <jdnew...@dcn.davis.ca.us> wrote: > > If you don't need all that FUN flexibility, you can get this done way faster > with the aperm and colMeans functions: > > tst <- matrix( seq.int( 1440 * 360 ) > , ncol = 1440 > , nrow = 360 > ) > tst.small <- matrix( seq.int( 8 * 4 ) > , ncol = 8 > , nrow = 4 > ) > aggregate.nx.ny.expand.grid <- function( dta, nx = 2, ny = 2, FUN = mean, ... > ) > { > ilon <- seq( 1, ncol( dta ), nx ) > ilat <- seq( 1, nrow( dta ), ny ) > cells <- as.matrix( expand.grid( ilat, ilon ) ) > blocks <- apply( cells > , 1 > , function( x ) dta[ x[ 1 ]:( x[ 1 ] + 1 ), x[ 2 ]:( x[ 2 ] + > 1 ) ] ) > block.means <- colMeans( blocks ) > matrix( block.means > , nrow( dta ) / ny > , ncol( dta ) / nx > ) > } > > aggregate.nx.ny.array.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... > ) { > a <- array( dta > , dim = c( ny > , nrow( dta ) %/% ny > , nx > , ncol( dta ) %/% nx > ) > ) > apply( a, c( 2, 4 ), FUN, ... ) > } > > aggregate.nx.ny.array.aperm.mean <- function( dta, nx = 2, ny = 2, ... ) { > # number of rows in result > nnr <- nrow( dta ) %/% ny > # number of columns in result > nnc <- ncol( dta ) %/% nx > # number of values to take mean of > nxny <- nx * ny > # describe existing layout of values in dta as 4-d array > a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) ) > # swap data in dimensions 2 and 3 > a2 <- aperm( a1, c( 1, 3, 2, 4 ) ) > # treat first two dimensions as column vectors, remaining as columns > a3 <- matrix( a2, nrow = nxny ) > # fast calculation of column means > v <- colMeans( a3, ... ) > # reframe result vector as a matrix > matrix( v, ncol = nnc ) > } > > aggregate.nx.ny.array.aperm.apply <- function( dta, nx = 2, ny = 2, FUN = > mean, ... ) { > # number of rows in result > nnr <- nrow( dta ) %/% ny > # number of columns in result > nnc <- ncol( dta ) %/% nx > # number of values to apply FUN to > nxny <- nx * ny > # describe existing layout of values in dta as 4-d array > a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) ) > # swap data in dimensions 2 and 3 > a2 <- aperm( a1, c( 1, 3, 2, 4 ) ) > # treat first two dimensions as column vectors, remaining as columns > a3 <- matrix( a2, nrow = nxny ) > # apply FUN to column vectors > v <- apply( a3, 2, FUN = FUN, ... ) > matrix( v, ncol = nnc ) > } > test1 <- aggregate.nx.ny.expand.grid( tst ) > test2 <- aggregate.nx.ny.array.apply( tst ) > test3 <- aggregate.nx.ny.array.aperm.mean( tst ) > test4 <- aggregate.nx.ny.array.aperm.apply( tst ) > library(microbenchmark) > microbenchmark( > aggregate.nx.ny.expand.grid( tst, 2, 2, mean, na.rm = TRUE ) > , aggregate.nx.ny.array.apply( tst, 2, 2, mean, na.rm = TRUE ) > , aggregate.nx.ny.array.aperm.mean( tst, 2, 2, na.rm = TRUE ) > , aggregate.nx.ny.array.aperm.apply( tst, 2, 2, mean, na.rm = TRUE ) > ) > #Unit: milliseconds > # expr min > # aggregate.nx.ny.expand.grid(tst, 2, 2, mean, na.rm = TRUE) 628.528322 > # aggregate.nx.ny.array.apply(tst, 2, 2, mean, na.rm = TRUE) 846.883314 > # aggregate.nx.ny.array.aperm.mean(tst, 2, 2, na.rm = TRUE) 8.904369 > # aggregate.nx.ny.array.aperm.apply(tst, 2, 2, mean, na.rm = TRUE) 619.691851 > # lq mean median uq max neval cld > # 675.470967 916.39630 778.54090 873.9754 2452.695 100 b > # 920.831966 1126.94691 1000.33830 1094.9233 3412.639 100 c > # 9.191747 21.98528 10.30099 15.9169 158.687 100 a > # 733.246331 936.73359 757.58383 844.2016 2824.557 100 b > > > On Sat, 30 Jul 2016, Jeff Newmiller wrote: > >> For the record, the array.apply code can be fixed as below, but then it is >> slower than the expand.grid version. >> >> aggregate.nx.ny.array.apply <- function(dta,nx=2,ny=2, FUN=mean,...) >> { >> a <- array(dta, dim = c(ny, nrow( dta ) %/% ny, nx, ncol( dta ) %/% nx)) >> apply( a, c(2, 4), FUN, ... ) >> } >> >> -- >> Sent from my phone. Please excuse my brevity. >> >> On July 30, 2016 11:06:16 AM PDT, "Anthoni, Peter (IMK)" >> <peter.anth...@kit.edu> wrote: >>> Hi all, >>> >>> thanks for the suggestions, I did some timing tests, see below. >>> Unfortunately the aggregate.nx.ny.array.apply, does not produce the >>> expected result. >>> So the fastest seems to be the aggregate.nx.ny.expand.grid, though the >>> double for loop is not that much slower. >>> >>> many thanks >>> Peter >>> >>>> tst=matrix(1:(1440*360),ncol=1440,nrow=360) >>>> system.time( {for(i in 1:10) >>> tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)}) >>> user system elapsed >>> 11.227 0.073 11.371 >>>> system.time( {for(i in 1:10) >>> tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)}) >>> user system elapsed >>> 26.354 0.475 26.880 >>>> system.time( {for(i in 1:10) >>> tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)}) >>> user system elapsed >>> 9.683 0.055 9.763 >>>> system.time( {for(i in 1:10) >>> tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)}) >>> user system elapsed >>> 7.693 0.055 7.800 >>> >>>> tst.small=matrix(1:(8*4),ncol=8,nrow=4) >>>> aggregate.nx.ny.forloop = function(data,nx=2,ny=2, FUN=mean,...) >>> + { >>> + nlon=nrow(data) >>> + nlat=ncol(data) >>> + newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny) >>> + dim(newdata) >>> + for(ilon in seq(1,nlon,nx)) { >>> + for(ilat in seq(1,nlat,ny)) { >>> + ilon_new=1+(ilon-1)/nx >>> + ilat_new=1+(ilat-1)/ny >>> + newdata[ilon_new,ilat_new] = FUN(data[ilon+0:1,ilat+0:1],...) >>> + } >>> + } >>> + newdata >>> + } >>>> aggregate.nx.ny.forloop(tst.small) >>> [,1] [,2] [,3] [,4] >>> [1,] 3.5 11.5 19.5 27.5 >>> [2,] 5.5 13.5 21.5 29.5 >>>> >>>> aggregate.nx.ny.interaction = function(data,nx=2,ny=2, FUN=mean,...) >>> + { >>> + >>> + nlon=nrow(data) >>> + nlat=ncol(data) >>> + newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny) >>> + newdata[] <- tapply( data, interaction( (row(data)+1) %/% 2, >>> (col(data)+1) %/% 2 ), FUN, ...) >>> + newdata >>> + } >>>> aggregate.nx.ny.interaction(tst.small) >>> [,1] [,2] [,3] [,4] >>> [1,] 3.5 11.5 19.5 27.5 >>> [2,] 5.5 13.5 21.5 29.5 >>>> >>>> aggregate.nx.ny.expand.grid = function(data,nx=2,ny=2, FUN=mean,...) >>> + { >>> + ilon <- seq(1,ncol(data),nx) >>> + ilat <- seq(1,nrow(data),ny) >>> + cells <- as.matrix(expand.grid(ilat, ilon)) >>> + blocks <- apply(cells, 1, function(x) >>> data[x[1]:(x[1]+1),x[2]:(x[2]+1)]) >>> + block.means <- colMeans(blocks) >>> + matrix(block.means, nrow(data)/ny, ncol(data)/nx) >>> + } >>>> aggregate.nx.ny.expand.grid(tst.small) >>> [,1] [,2] [,3] [,4] >>> [1,] 3.5 11.5 19.5 27.5 >>> [2,] 5.5 13.5 21.5 29.5 >>>> >>>> aggregate.nx.ny.array.apply = function(data,nx=2,ny=2, FUN=mean,...) >>> { >>> + a <- array(data, dim = c(ny, nrow( data ) %/% ny, ncol( data ) %/% >>> nx)) >>> + apply( a, c(2, 3), FUN, ... ) >>> + } >>>> aggregate.nx.ny.array.apply(tst.small) >>> [,1] [,2] [,3] [,4] >>> [1,] 1.5 5.5 9.5 13.5 >>> [2,] 3.5 7.5 11.5 15.5 >>> >>> >>> >>>> On 28 Jul 2016, at 00:26, David Winsemius <dwinsem...@comcast.net> >>> wrote: >>>> >>>> >>>>> On Jul 27, 2016, at 12:02 PM, Jeff Newmiller >>> <jdnew...@dcn.davis.ca.us> wrote: >>>>> >>>>> An alternative (more compact, not necessarily faster, because apply >>> is still a for loop inside): >>>>> >>>>> f <- function( m, nx, ny ) { >>>>> # redefine the dimensions of my >>>>> a <- array( m >>>>> , dim = c( ny >>>>> , nrow( m ) %/% ny >>>>> , ncol( m ) %/% nx ) >>>>> ) >>>>> # apply mean over dim 1 >>>>> apply( a, c( 2, 3 ), FUN=mean ) >>>>> } >>>>> f( tst, nx, ny ) >>>> >>>> Here's an apparently loopless strategy, although I suspect the code >>> for interaction (and maybe tapply as well?) uses a loop. >>>> >>>> >>>> tst_2X2 <- matrix(NA, ,ncol=4,nrow=2) >>>> >>>> tst_2x2[] <- tapply( tst, interaction( (row(tst)+1) %/% 2, >>> (col(tst)+1) %/% 2 ), mean) >>>> >>>> tst_2x2 >>>> >>>> [,1] [,2] [,3] [,4] >>>> [1,] 3.5 11.5 19.5 27.5 >>>> [2,] 5.5 13.5 21.5 29.5 >>>> >>>> -- >>>> David. >>>> >>>> >>>>> >>>>> -- >>>>> Sent from my phone. Please excuse my brevity. >>>>> >>>>> On July 27, 2016 9:08:32 AM PDT, David L Carlson <dcarl...@tamu.edu> >>> wrote: >>>>>> This should be faster. It uses apply() across the blocks. >>>>>> >>>>>>> ilon <- seq(1,8,nx) >>>>>>> ilat <- seq(1,4,ny) >>>>>>> cells <- as.matrix(expand.grid(ilat, ilon)) >>>>>>> blocks <- apply(cells, 1, function(x) tst[x[1]:(x[1]+1), >>>>>> x[2]:(x[2]+1)]) >>>>>>> block.means <- colMeans(blocks) >>>>>>> tst_2x2 <- matrix(block.means, 2, 4) >>>>>>> tst_2x2 >>>>>> [,1] [,2] [,3] [,4] >>>>>> [1,] 3.5 11.5 19.5 27.5 >>>>>> [2,] 5.5 13.5 21.5 29.5 >>>>>> >>>>>> ------------------------------------- >>>>>> David L Carlson >>>>>> Department of Anthropology >>>>>> Texas A&M University >>>>>> College Station, TX 77840-4352 >>>>>> >>>>>> >>>>>> >>>>>> -----Original Message----- >>>>>> From: R-help [mailto:r-help-boun...@r-poject.org] On Behalf Of >>> Anthoni, >>>>>> Peter (IMK) >>>>>> Sent: Wednesday, July 27, 2016 6:14 AM >>>>>> To: r-help@r-project.org >>>>>> Subject: [R] Aggregate matrix in a 2 by 2 manor >>>>>> >>>>>> Hi all, >>>>>> >>>>>> I need to aggregate some matrix data (1440x720) to a lower >>> dimension >>>>>> (720x360) for lots of years and variables >>>>>> >>>>>> I can do double for loop, but that will be slow. Anybody know a >>> quicker >>>>>> way? >>>>>> >>>>>> here an example with a smaller matrix size: >>>>>> >>>>>> tst=matrix(1:(8*4),ncol=8,nrow=4) >>>>>> tst_2x2=matrix(NA,ncol=4,nrow=2) >>>>>> nx=2 >>>>>> ny=2 >>>>>> for(ilon in seq(1,8,nx)) { >>>>>> for (ilat in seq(1,4,ny)) { >>>>>> ilon_2x2=1+(ilon-1)/nx >>>>>> ilat_2x2=1+(ilat-1)/ny >>>>>> tst_2x2[ilat_2x2,ilon_2x2] = mean(tst[ilat+0:1,ilon+0:1]) >>>>>> } >>>>>> } >>>>>> >>>>>> tst >>>>>> tst_2x2 >>>>>> >>>>>>> tst >>>>>> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] >>>>>> [1,] 1 5 9 13 17 21 25 29 >>>>>> [2,] 2 6 10 14 18 22 26 30 >>>>>> [3,] 3 7 11 15 19 23 27 31 >>>>>> [4,] 4 8 12 16 20 24 28 32 >>>>>> >>>>>>> tst_2x2 >>>>>> [,1] [,2] [,3] [,4] >>>>>> [1,] 3.5 11.5 19.5 27.5 >>>>>> [2,] 5.5 13.5 21.5 29.5 >>>>>> >>>>>> >>>>>> I though a cast to 3d-array might do the trick and apply over the >>> new >>>>>> dimension, but that does not work, since it casts the data along >>> the >>>>>> row. >>>>>>> matrix(apply(array(tst,dim=c(nx,ny,8)),3,mean),nrow=nrow(tst)/ny) >>>>>> [,1] [,2] [,3] [,4] >>>>>> [1,] 2.5 10.5 18.5 26.5 >>>>>> [2,] 6.5 14.5 22.5 30.5 >>>>>> >>>>>> >>>>>> cheers >>>>>> Peter >>>>>> >>>>>> ______________________________________________ >>>>>> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >>>>>> https://stat.ethz.ch/mailman/listinfo/r-help >>>>>> PLEASE do read the posting guide >>>>>> http://www.R-project.org/posting-guide.html >>>>>> and provide commented, minimal, self-contained, reproducible code. >>>>>> >>>>>> ______________________________________________ >>>>>> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >>>>>> https://stat.ethz.ch/mailman/listinfo/r-help >>>>>> PLEASE do read the posting guide >>>>>> http://www.R-project.org/posting-guide.html >>>>>> and provide commented, minimal, self-contained, reproducible code. >>>>> >>>>> ______________________________________________ >>>>> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >>>>> https://stat.ethz.ch/mailman/listinfo/r-help >>>>> PLEASE do read the posting guide >>> http://www.R-project.org/posting-guide.html >>>>> and provide commented, minimal, self-contained, reproducible code. >>>> >>>> David Winsemius >>>> Alameda, CA, USA >>>> >> >> ______________________________________________ >> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. >> > > --------------------------------------------------------------------------- > Jeff Newmiller The ..... ..... Go Live... > DCN:<jdnew...@dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go... > Live: OO#.. Dead: OO#.. Playing > Research Engineer (Solar/Batteries O.O#. #.O#. with > /Software/Embedded Controllers) .OO#. .OO#. rocks...1k > --------------------------------------------------------------------------- ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.