Not sure this approach yields meaningful data, but as a demonstration of vectorization I got a factor of 10 speedup.

sine.approx3 <- function( tmin, tmax ) {
  B <- (2*pi)/24  # period = 24 hours
  C <- pi/2 # horizontal shift
  tmin <- t( tmin )
  tmax <- t( tmax )
  idx <- seq.int(  24 * 4 * nrow( tmax ) - 12 * 4 )
  xout <- ( idx - 1 ) * 0.25
  cycles <- sin( B * xout - C )
  mag <- matrix( NA, ncol=ncol( tmax ), nrow=length( idx ) )
  idxup <- 2 * seq.int( nrow( tmax ) ) - 1
  idxdn <- idxup[ -1 ] - 1
  idxups <- rep( seq.int( nrow( tmax ) ), each=48 )
  idxdns <- rep( seq.int( nrow( tmax ) - 1 ), each=48 )
  idxupm <- c( outer( 1:48, 48*( idxup - 1 ), FUN="+" ) )
  idxdnm <- c( outer( 1:48, 48*( idxdn - 1 ), FUN="+" ) )
  mag[ idxupm, ] <- ( tmax - tmin )[ idxups,  ] / 2
  mag[ idxdnm, ] <- ( tmax[ -nrow( tmax ), ]
                    - tmin[ -1, ] )[ idxdns, ] / 2
  mag <- mag * rep( cycles, times=ncol( mag ) )
  mag[ idxupm, ] <- mag[ idxupm, ] + ( tmax + tmin )[ idxups, ] / 2
  mag[ idxdnm, ] <- mag[ idxdnm, ] + ( tmax[ -nrow( tmax ), ]
                                     + tmin[ -1, ] )[ idxdns, ] / 2
  t( mag )
}


On Tue, 13 May 2014, Ortiz-Bobea, Ariel wrote:

Hello,

I'm trying to fit a sine curve over successive temperature readings (i.e. 
minimum and maximum temperature) over several days and for many locations. The 
code below shows a hypothetical example of 5000 locations with  7 days of 
temperature data. Not very efficient when you have many more locations and days.

The linear interpolation takes 0.7 seconds, and the sine interpolations take 2 
to 4 seconds depending on the approach.

Any ideas on how to speed this up? Thanks in advance.

Ariel

### R Code ######

# 1- Prepare data fake data
 days<- 7
 n <- 5000*days
 tmin <- matrix(rnorm(n, mean=0) , ncol=days, nrow=5000)
 tmax <- matrix(rnorm(n, mean=10), ncol=days, nrow=5000)
 m <- matrix(NA, ncol=days*2, nrow=5000)
 m[,seq(1,ncol(m),2)]  <- tmin
 m[,seq(2,ncol(m)+1,2)]<- tmax
 # check first row
 plot(1:ncol(m), m[1,], type="l")

# 2 -linear interpolation: 0.66 seconds
 xout <- seq(0,ncol(m),0.25/24*2)[-1] # time step = 0.25 hours or 15 minutes
 system.time( m1 <- t(apply(m,1, function(y) approx(x=1:ncol(m), y=y, xout=xout, 
method="linear")$y)) )
 # Check first row
 plot(1:ncol(m), m[1,], type="l")
 points(xout, m1[1,], col="red", cex=1)


# 3- sine interpolation
 sine.approx1 <- function(index, tmin, tmax) {
   b <- (2*pi)/24  # period = 24 hours
   c <- pi/2 # horizontal shift
   xout <- seq(0,24,0.25)[-1]
   yhat <- apply(cbind(tmin[index,],tmax[index,]), 1, function(z) diff(z)/2 * 
sin(b*xout-c) + mean(z))
   #yhat <- yhat[-nrow(yhat),]
   yhat <- c(yhat)
   #plot(yhat, type="l")
 }
 sine.approx2 <- function(index, tmin, tmax) {
   b <- (2*pi)/24  # period = 24 hours
   c <- pi/2 # horizontal shift
   xout1 <- seq(0 ,12,0.25)
   xout2 <- seq(12,24,0.25)[-1]
   xout2 <- xout2[-length(xout2)]
   yhat1 <- apply(cbind(tmin[index,]                       ,tmax[index,]    ), 
1, function(z) diff(z)/2 * sin(b*xout1-c) + mean(z))
   yhat2 <- apply(cbind(tmax[index,][-length(tmax[index,])],tmin[index,][-1]), 
1, function(z) diff(z)/2 * sin(b*xout2+c) + mean(z))
   yhat2 <- cbind(yhat2,NA)
   yhat3 <- rbind(yhat1,yhat2)
   #yhat3 <- yhat3[-nrow(yhat3),]
   yhat3 <- c(yhat3)
   yhat <- yhat3
   #plot(c(yhat1))
   #plot(c(yhat2))
   #plot(yhat, type="l")
 }

 # Single sine: 2.23 seconds
 system.time( m2 <- t(sapply(1:nrow(m), function(i) sine.approx1(i, tmin=tmin, 
tmax=tmax))) )

 # Double sine: 4.03 seconds
 system.time( m3 <- t(sapply(1:nrow(m), function(i) sine.approx2(i, tmin=tmin, 
tmax=tmax))) )

 # take a look at approach 1
 plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l")
 points(xout, m2[1,], col="red", cex=1)

 # take a look at approach 2
 plot(seq(-1,ncol(m)-1,1)[-1], m[1,], type="l")
 points(xout, m3[1,], col="blue", cex=1)


---
Ariel Ortiz-Bobea
Fellow
Resources for the Future
1616 P Street, N.W.
Washington, DC 20036
202-328-5173


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnew...@dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                      Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to