HI Bill, It is much simpler. # with aggregate() and merge() res1<-with(dat2,aggregate(seq_len(nrow(dat2)),by=list(idr=idr),FUN=function(i) with(dat2[i,], any(schyear<=5 & year ==0)))) res2<-merge(dat2,res1,by="idr") colnames(res2)[4]<-"flag" within(res2,{flag<-as.integer(flag)}) #idr schyear year flag #1 1 4 -1 1 #2 1 5 0 1 #3 1 6 1 1 #4 1 7 2 1 #5 2 9 0 0 #6 2 10 1 0 #7 2 11 2 0
A.K. ----- Original Message ----- From: William Dunlap <wdun...@tibco.com> To: arun <smartpink...@yahoo.com>; Christopher Desjardins <cddesjard...@gmail.com> Cc: R help <r-help@r-project.org> Sent: Saturday, November 3, 2012 9:21 PM Subject: RE: [R] Replacing NAs in long format Or, even simpler, > flag <- with(dat2, ave(schyear<=5 & year==0, idr, FUN=any)) > data.frame(dat2, flag) idr schyear year flag 1 1 4 -1 TRUE 2 1 5 0 TRUE 3 1 6 1 TRUE 4 1 7 2 TRUE 5 2 9 0 FALSE 6 2 10 1 FALSE 7 2 11 2 FALSE Bill Dunlap Spotfire, TIBCO Software wdunlap tibco.com > -----Original Message----- > From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On > Behalf > Of William Dunlap > Sent: Saturday, November 03, 2012 5:38 PM > To: arun; Christopher Desjardins > Cc: R help > Subject: Re: [R] Replacing NAs in long format > > ave() or split<-() can make that easier to write, although it > may take some time to internalize the idiom. E.g., > > > flag <- rep(NA, nrow(dat2)) # add as.integer if you prefer 1,0 over >TRUE,FALSE > > split(flag, dat2$idr) <- lapply(split(dat2, dat2$idr), function(d)with(d, >any(schyear<=5 & > year==0))) > > data.frame(dat2, flag) > idr schyear year flag > 1 1 4 -1 TRUE > 2 1 5 0 TRUE > 3 1 6 1 TRUE > 4 1 7 2 TRUE > 5 2 9 0 FALSE > 6 2 10 1 FALSE > 7 2 11 2 FALSE > or > > ave(seq_len(nrow(dat2)), dat2$idr, FUN=function(i)with(dat2[i,], >any(schyear<=5 & > year==0))) > [1] 1 1 1 1 0 0 0 > > flag <- ave(seq_len(nrow(dat2)), dat2$idr, FUN=function(i)with(dat2[i,], > any(schyear<=5 & year==0))) > > data.frame(dat2, flag) > idr schyear year flag > 1 1 4 -1 1 > 2 1 5 0 1 > 3 1 6 1 1 > 4 1 7 2 1 > 5 2 9 0 0 > 6 2 10 1 0 > 7 2 11 2 0 > > Bill Dunlap > Spotfire, TIBCO Software > wdunlap tibco.com > > > > -----Original Message----- > > From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On > > Behalf > > Of arun > > Sent: Saturday, November 03, 2012 5:01 PM > > To: Christopher Desjardins > > Cc: R help > > Subject: Re: [R] Replacing NAs in long format > > > > Hi, > > May be this helps: > > dat2<-read.table(text=" > > idr schyear year > > 1 4 -1 > > 1 5 0 > > 1 6 1 > > 1 7 2 > > 2 9 0 > > 2 10 1 > > 2 11 2 > > ",sep="",header=TRUE) > > > > dat2$flag<-unlist(lapply(split(dat2,dat2$idr),function(x) > > rep(ifelse(any(apply(x,1,function(x) x[2]<=5 & > x[3]==0)),1,0),nrow(x))),use.names=FALSE) > > dat2 > > # idr schyear year flag > > #1 1 4 -1 1 > > #2 1 5 0 1 > > #3 1 6 1 1 > > #4 1 7 2 1 > > #5 2 9 0 0 > > #6 2 10 1 0 > > #7 2 11 2 0 > > A.K. > > > > > > > > > > ----- Original Message ----- > > From: Christopher Desjardins <cddesjard...@gmail.com> > > To: jim holtman <jholt...@gmail.com> > > Cc: r-help@r-project.org > > Sent: Saturday, November 3, 2012 7:09 PM > > Subject: Re: [R] Replacing NAs in long format > > > > I have a similar sort of follow up and I bet I could reuse some of this > > code but I'm not sure how. > > > > Let's say I want to create a flag that will be equal to 1 if schyear < = 5 > > and year = 0 for a given idr. For example > > > > > dat > > > > idr schyear year > > 1 4 -1 > > 1 5 0 > > 1 6 1 > > 1 7 2 > > 2 9 0 > > 2 10 1 > > 2 11 2 > > > > How could I make the data look like this? > > > > idr schyear year flag > > 1 4 -1 1 > > 1 5 0 1 > > 1 6 1 1 > > 1 7 2 1 > > 2 9 0 0 > > 2 10 1 0 > > 2 11 2 0 > > > > > > I am not sure how to end up not getting both 0s and 1s for the 'flag' > > variable for an idr. For example, > > > > dat$flag = ifelse(schyear <= 5 & year ==0, 1, 0) > > > > Does not work because it will create: > > > > idr schyear year flag > > 1 4 -1 0 > > 1 5 0 1 > > 1 6 1 0 > > 1 7 2 0 > > 2 9 0 0 > > 2 10 1 0 > > 2 11 2 0 > > > > And thus flag changes for an idr. Which it shouldn't. > > > > Thanks, > > Chris > > > > > > On Sat, Nov 3, 2012 at 5:50 PM, Christopher Desjardins < > > cddesjard...@gmail.com> wrote: > > > > > Hi Jim, > > > Thank you so much. That does exactly what I want. > > > Chris > > > > > > > > > On Sat, Nov 3, 2012 at 1:30 PM, jim holtman <jholt...@gmail.com> wrote: > > > > > >> > x <- read.table(text = "idr schyear year > > >> + 1 8 0 > > >> + 1 9 1 > > >> + 1 10 NA > > >> + 2 4 NA > > >> + 2 5 -1 > > >> + 2 6 0 > > >> + 2 7 1 > > >> + 2 8 2 > > >> + 2 9 3 > > >> + 2 10 4 > > >> + 2 11 NA > > >> + 2 12 6 > > >> + 3 4 NA > > >> + 3 5 -2 > > >> + 3 6 -1 > > >> + 3 7 0 > > >> + 3 8 1 > > >> + 3 9 2 > > >> + 3 10 3 > > >> + 3 11 NA", header = TRUE) > > >> > # you did not specify if there might be multiple contiguous NAs, > > >> > # so there are a lot of checks to be made > > >> > x.l <- lapply(split(x, x$idr), function(.idr){ > > >> + # check for all NAs -- just return indeterminate state > > >> + if (sum(is.na(.idr$year)) == nrow(.idr)) return(.idr) > > >> + # repeat until all NAs have been fixed; takes care of contiguous > > >> ones > > >> + while (any(is.na(.idr$year))){ > > >> + # find all the NAs > > >> + for (i in which(is.na(.idr$year))){ > > >> + if ((i == 1L) && (!is.na(.idr$year[i + 1L]))){ > > >> + .idr$year[i] <- .idr$year[i + 1L] - 1 > > >> + } else if ((i > 1L) && (!is.na(.idr$year[i - 1L]))){ > > >> + .idr$year[i] <- .idr$year[i - 1L] + 1 > > >> + } else if ((i < nrow(.idr)) && (!is.na(.idr$year[i + > > >> 1L]))){ > > >> + .idr$year[i] <- .idr$year[i + 1L] -1 > > >> + } > > >> + } > > >> + } > > >> + return(.idr) > > >> + }) > > >> > do.call(rbind, x.l) > > >> idr schyear year > > >> 1.1 1 8 0 > > >> 1.2 1 9 1 > > >> 1.3 1 10 2 > > >> 2.4 2 4 -2 > > >> 2.5 2 5 -1 > > >> 2.6 2 6 0 > > >> 2.7 2 7 1 > > >> 2.8 2 8 2 > > >> 2.9 2 9 3 > > >> 2.10 2 10 4 > > >> 2.11 2 11 5 > > >> 2.12 2 12 6 > > >> 3.13 3 4 -3 > > >> 3.14 3 5 -2 > > >> 3.15 3 6 -1 > > >> 3.16 3 7 0 > > >> 3.17 3 8 1 > > >> 3.18 3 9 2 > > >> 3.19 3 10 3 > > >> 3.20 3 11 4 > > >> > > > >> > > > >> > > >> > > >> On Sat, Nov 3, 2012 at 1:14 PM, Christopher Desjardins > > >> <cddesjard...@gmail.com> wrote: > > >> > Hi, > > >> > I have the following data: > > >> > > > >> >> data[1:20,c(1,2,20)] > > >> > idr schyear year > > >> > 1 8 0 > > >> > 1 9 1 > > >> > 1 10 NA > > >> > 2 4 NA > > >> > 2 5 -1 > > >> > 2 6 0 > > >> > 2 7 1 > > >> > 2 8 2 > > >> > 2 9 3 > > >> > 2 10 4 > > >> > 2 11 NA > > >> > 2 12 6 > > >> > 3 4 NA > > >> > 3 5 -2 > > >> > 3 6 -1 > > >> > 3 7 0 > > >> > 3 8 1 > > >> > 3 9 2 > > >> > 3 10 3 > > >> > 3 11 NA > > >> > > > >> > What I want to do is replace the NAs in the year variable with the > > >> > following: > > >> > > > >> > idr schyear year > > >> > 1 8 0 > > >> > 1 9 1 > > >> > 1 10 2 > > >> > 2 4 -2 > > >> > 2 5 -1 > > >> > 2 6 0 > > >> > 2 7 1 > > >> > 2 8 2 > > >> > 2 9 3 > > >> > 2 10 4 > > >> > 2 11 5 > > >> > 2 12 6 > > >> > 3 4 -3 > > >> > 3 5 -2 > > >> > 3 6 -1 > > >> > 3 7 0 > > >> > 3 8 1 > > >> > 3 9 2 > > >> > 3 10 3 > > >> > 3 11 4 > > >> > > > >> > I have no idea how to do this. What it needs to do is make sure that > > >> > for > > >> > each subject (idr) that it either adds a 1 if it is preceded by a value > > >> in > > >> > year or subtracts a 1 if it comes before a year value. > > >> > > > >> > Does that make sense? I could do this in Excel but I am at a loss for > > >> how > > >> > to do this in R. Please reply to me as well as the list if you respond. > > >> > > > >> > Thanks! > > >> > Chris > > >> > > > >> > [[alternative HTML version deleted]] > > >> > > > >> > ______________________________________________ > > >> > R-help@r-project.org mailing list > > >> > https://stat.ethz.ch/mailman/listinfo/r-help > > >> > PLEASE do read the posting guide > > >> http://www.R-project.org/posting-guide.html > > >> > and provide commented, minimal, self-contained, reproducible code. > > >> > > >> > > >> > > >> -- > > >> Jim Holtman > > >> Data Munger Guru > > >> > > >> What is the problem that you are trying to solve? > > >> Tell me what you want to do, not how you want to do it. > > >> > > > > > > > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help@r-project.org mailing list > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. > > > > > > ______________________________________________ > > R-help@r-project.org mailing list > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.