Here is my latest. I kind of changed the problem (for speed). In real life
I have over 300 uadata type matrices, each having over 20 rows and over
11,000 columns. However the rddata file is valid for all of the uadata
matrices that I have (300). What I am doing now: I'm creating a matrix of
row indices which will either lag the row values or not based on the report
data (rddata). Then I apply that matrix of row indices to each uadata data
item (300 times) to create a matrix of the correctly row adjusted data
items for the correct columns of the dimensions and periodicity that I want
(weekly in this case). The key being, I only do the 'adjustment' once
(which is comparatively slow) and I apply those results to the data matrix
(fast!).
I'm open to ideas. I put this together quickly so hopefully all is well.
#########sample data
zdates =
c("2007-03-31","2007-06-30","2007-09-30","2007-12-31","2008-03-31","2008-06-30","2008-09-30","2008-12-31")
nms = c("A","B","C","D")
# these are the report dates that are the real days the data was available
rddata =
matrix(c("20070514","20070814","20071115","20080213","20080514","20080814","20081114","20090217",
"20070410","20070709","20071009","20080109","20080407","20080708","20081007","20090112",
"20070426","--","--","--","--","--","--","20090319",
"--","--","--","--","--","--","--","--"),
nrow=8,ncol=4)
dimnames(rddata) = list(zdates,nms)
# this is the unadjusted raw data, that always has the same dimensions,
rownames, and colnames as the report dates
uadata = matrix(c(640.35,636.16,655.91,657.41,682.06,702.90,736.15,667.65,
2625.050,2625.050,2645.000,2302.000,1972.000,1805.000,1547.000,1025.000,
NaN, NaN,-98.426,190.304,180.894,183.220,172.520, 144.138,
NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN),
nrow=8,ncol=4)
dimnames(uadata) = list(zdates,nms)
#################################################### I do this once
fix = function(x)
{
year = substring(x, 1, 4)
mo = substring(x, 5, 6)
day = substring(x, 7, 8)
ifelse(year=="--", "--", paste(year, mo, day, sep = "-"))
}
rd = apply(rddata, 2, fix)
dimnames(rd) = dimnames(rd)
wd1 <- seq(from =as.Date(min(zdates)), to = Sys.Date(), by = "day")
wd1 = wd1[weekdays(wd1) == "Friday"] # uncomment to go weekly
wd = sapply(wd1, as.character)
mat = matrix(NA,nrow=length(wd),ncol=ncol(uadata))
rownames(mat) = wd
nms = as.Date(rownames(uadata))
for(i in 1:length(wd)){
d = as.Date(wd[i])
diff = abs(nms - d)
rd_row_idx = max(which(diff == min(diff)))
rd_row_idx_lag = rd_row_idx - 1
rd_row_idx_lag2 = rd_row_idx - 2
rd_col_idx = which(as.Date(rd[rd_row_idx,], format="%Y-%m-%d") < d)
rd_col_idx_lag = which(as.Date(rd[rd_row_idx_lag,], format="%Y-%m-%d") <
d)
rd_col_idx_lag2 = which(as.Date(rd[rd_row_idx_lag2,], format="%Y-%m-%d")
< d)
## if(length(rd_col_idx_lag2) && (rd_row_idx - 2) > 0){
if(rd_row_idx_lag2 > 0){
# mat[i,rd_col_idx_lag2] = ua[rd_row_idx_lag2,rd_col_idx_lag2]
mat[i,rd_col_idx_lag2] = rd_row_idx_lag2
}
#if(length(rd_col_idx_lag)){
mat[i,rd_col_idx_lag] = rd_row_idx_lag
#}
#if( length(rd_col_idx)){
mat[i,rd_col_idx] = rd_row_idx
#}
}
indx = mat
vals = uadata
########################## I do this 300 times
x =
matrix(vals[cbind(c(indx),rep(1:ncol(indx),each=nrow(indx)))],nrow=nrow(indx),ncol=ncol(indx))
Regards,
ben
On Thu, Mar 8, 2012 at 11:40 AM, Rui Barradas <[email protected]> wrote:
> Hello,
>
> > Humm.... If I understand what you are saying, you are correct. I get
> > 144.138 for 2009-03-20 for column C. Maybe I posted the wrong code? If
> > so,
> > sorry.
>
> I think I have the fastest so far solution, and it checks with your
> corrected,last one.
>
> I've made just a change: to transform it into a function I renamed the
> parameters
> (only for use inside the function) 'zdates', without the period, 'rddata'
> and 'uadata'.
>
> 'fun1' is yours, 'fun2', mine. Here it goes.
>
>
> fun1 <- function(zdates, rddata, uadata){
> fix = function(x)
> {
> year = substring(x, 1, 4)
> mo = substring(x, 5, 6)
> day = substring(x, 7, 8)
> ifelse(year=="--", "--", paste(year, mo, day, sep = "-"))
>
> }
> rd = apply(rddata, 2, fix)
> dimnames(rd) = dimnames(rd)
>
> wd1 <- seq(from =as.Date(min(zdates)), to = Sys.Date(), by = "day")
> #wd1 = wd1[weekdays(wd1) == "Friday"] # uncomment to go weekly
> wd = sapply(wd1, as.character)
> mat = matrix(NA,nrow=length(wd),ncol=ncol(uadata))
> rownames(mat) = wd
> nms = as.Date(rownames(uadata))
>
> for(i in 1:length(wd)){
> d = as.Date(wd[i])
> diff = abs(nms - d)
> rd_row_idx = max(which(diff == min(diff)))
> rd_col_idx = which(as.Date(rd[rd_row_idx,], format="%Y-%m-%d") < d)
> rd_col_idx_lag = which(as.Date(rd[rd_row_idx - 1,], format="%Y-%m-%d")
> < d)
> rd_col_idx_lag2 = which(as.Date(rd[rd_row_idx - 2,],
> format="%Y-%m-%d") < d)
>
> if(length(rd_col_idx_lag2) && (rd_row_idx - 2) > 0){
> mat[i,rd_col_idx_lag2] = uadata[rd_row_idx - 2,rd_col_idx_lag2]
> }
> if(length(rd_col_idx_lag)){
> mat[i,rd_col_idx_lag] = uadata[rd_row_idx - 1,rd_col_idx_lag]
> }
> if( length(rd_col_idx)){
> mat[i,rd_col_idx] = uadata[rd_row_idx,rd_col_idx]
> }
> }
> colnames(mat)=colnames(uadata)
> mat
> }
>
> fun2 <- function(zdates, rddata, uadata){
>
> fdate <- function(x, format="%Y%m%d"){
> DF <- data.frame(x)
> for(i in colnames(DF)){
> DF[, i] <- as.Date(DF[, i], format=format)
> class(DF[, i]) <- "Date"
> }
> DF
> }
>
> rddata <- fdate(rddata)
> wd1 <- seq(from = as.Date(zdates[1]), to = Sys.Date(), by = "day")
> nwd1 <- length(wd1)
>
> fin1 <- matrix(NA, nrow=length(wd1), ncol=ncol(uadata))
> nr <- nrow(rddata)
> xstart <- c(integer(nr), nwd1)
> for(j in 1:ncol(uadata)){
> x <- xstart
> for(i in 1:nr)
> x[i] <- if(!is.na(rddata[i, j]) &
> !is.nan(rddata[i, j]))
> which(wd1 == rddata[i, j])
> else NA
> ix <- which(!is.na(x))
> for(i in seq_len(length(ix) - 1)){
> from <- x[ ix[i] ] + 1
> to <- x[ ix[i + 1] ]
> fin1[ from:to, j ] <- uadata[ ix[i], j ]
> }
> }
> colnames(fin1) <- colnames(uadata)
> rownames(fin1) <- as.character(wd1)
> fin1
> }
>
> t1 <- system.time(m1 <- fun1(z.dates, rd1, ua))
> t2 <- system.time(m2 <- fun2(z.dates, rd1, ua))
>
> all.equal(m1, m2)
> [1] TRUE
>
> rbind(t1, t2)
> user.self sys.self elapsed user.child sys.child
> t1 1.50 0 1.50 NA NA
> t2 0.02 0 0.01 NA NA
>
> And the better news is that I believe it scales up without degrading
> performance,
> like my first did.
>
> See if it works.
>
> Rui Barradas
>
>
>
> --
> View this message in context:
> http://r.789695.n4.nabble.com/index-instead-of-loop-tp4447672p4457290.html
> Sent from the R help mailing list archive at Nabble.com.
>
> ______________________________________________
> [email protected] mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>
[[alternative HTML version deleted]]
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.