Hi Justin: I'm not dead certain this is what you were after, but try this:
dat<-data.frame(id = rep(1:5, each = 200), state=sample(1:3, 1000, replace=TRUE, prob=c(0.7,0.05,0.25)), V1=runif(1000, 1, 10), V2=rnorm(1000)) ## input a data frame, output a data frame... loopFun <- function(d) { rle.dat <- rle(d$state) to <- cumsum(rle.dat$lengths) from <- c(1, 1 + to[-length(to)]) ## data frame of input parameters for mdply() below subs <- data.frame(from = from, to = to) ## compute mean & sum from data subset ## between from and to foo <- function(from, to) { subd <- d[from:to, ] data.frame(v1mean = mean(subd$V1), v2sum = sum(subd$V2)) } u <- mdply(subs, foo) data.frame(index = seq_along(from), state = rle.dat$values, u) } w <- ddply(dat, .(id), loopFun) My result looks like: > head(w) id index state from to v1mean v2sum 1 1 1 2 1 1 6.077463 -0.9980917 2 1 2 3 2 2 5.013528 -0.4693002 3 1 3 1 3 10 6.066698 3.3607026 4 1 4 3 11 11 3.235843 0.8072452 5 1 5 1 12 13 3.385865 1.5167562 6 1 6 3 14 14 4.236730 -1.4976861 > tail(w) id index state from to v1mean v2sum 430 5 85 3 177 178 7.327400 0.4125103 431 5 86 1 179 185 5.084396 -0.3874377 432 5 87 2 186 187 2.558208 -1.2045609 433 5 88 1 188 192 6.180575 2.2682108 434 5 89 3 193 193 5.606389 1.2107051 435 5 90 1 194 200 5.307754 -0.8947832 HTH, Dennis On Fri, Jun 17, 2011 at 3:55 PM, Justin Haynes <jto...@gmail.com> wrote: > I think need to do something like this: > > dat<-data.frame(state=sample(id=rep(1:5,each=200),1:3, 1000, > replace=T,prob=c(0.7,0.05,0.25)),V1=runif(1,10,1000),V2=rnorm(1000)) > rle.dat<-rle(dat$state) > temp<-1 > out<-data.frame(id=1:length(rle.dat$length)) > for(i in 1:length(rle.dat$length)){ > temp2<-temp+rle.dat$length[[i]] > out$V1[i]<-mean(dat$V1[temp:temp2]) > out$V2[i]<-sum(dat$V2[temp:temp2]) > out$state[i]<-rle.dat$value[[i]] > temp<-temp2 > } > > to a very large dataset. I want to apply a few summary functions to > some variables within a data.frame for given states. to complicate > things, id like to use plyr and split on the id variable before i do > any of this... > > loop.func<-function(dat){ > rle.dat<-rle(dat$state) > temp<-1 > out<-data.frame(id=1:length(rle.dat$length)) > for(i in 1:length(rle.dat$length)){ > temp2<-temp+rle.dat$length[[i]] > out$V1[i]<-mean(dat$V1[temp:temp2]) > out$V2[i]<-sum(dat$V2[temp:temp2]) > out$state[i]<-rle.dat$value[[i]] > temp<-temp2 > } > return(out) > } > out<-ddply(dat,.(id),loop.func) > > mostly, i just don't understand how to use a list (especially in this > instance) in a plyr/apply statement... > > > Thanks, > > Justin > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.