Thank you so much. You are the BEST:-) Now, I'm gonna learn all these codes and understand.
________________________________ Cc: R help <r-help@r-project.org> Sent: Saturday, April 13, 2013 5:18 PM Subject: Re: Comparison of Date format Hi, DataA<- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), Status = c("A", "B", "A", "B", "A", "B", "A", "A", "B", "B", "A", "A", "A", "A", "B", "A", "B", "A", "A", "A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "B", "A", "B", "A", "B", "A", "B", "A" ), Date1 = c("03-Feb-01", "15-May-01", "16-May-01", "06-Sep-01", "13-Sep-01", "26-Feb-04", "08-Mar-04", "09-Feb-01", "24-May-01", "25-May-01", "06-Mar-02", "18-Mar-02", "20-Mar-02", "01-Apr-01", "05-May-01", "08-Jul-01", "09-Feb-01", "13-Jun-01", "14-Jun-01", "19-Jun-01", "05-Jul-01", "29-Oct-01", "13-Nov-01", "19-Jan-02", "05-Feb-02", "18-Mar-02", "01-Apr-02", "30-Oct-02", "21-Nov-02", "26-Mar-03", "06-Jun-03", "17-Jun-03", "05-Nov-03", "12-Nov-03", "13-Nov-03", "01-Apr-02", "30-Oct-02", "21-Nov-02", "26-Mar-03", "06-Jun-03", "17-Jun-03", "05-Nov-03", "12-Nov-03", "13-Nov-03", "12-Nov-03", "13-Nov-03"), Date2 = c("15-May-01", "16-May-01", "06-Sep-01", "13-Sep-01", "26-Feb-04", "08-Mar-04", "01-Aug-04", "24-May-01", "25-May-01", "06-Mar-02", "18-Mar-02", "20-Mar-02", "27-Mar-02", "08-May-02", "14-Jun-02", "07-Sep-02", "13-Jun-01", "14-Jun-01", "19-Jun-01", "05-Jul-01", "29-Oct-01", "13-Nov-01", "19-Jan-02", "05-Feb-02", "18-Mar-02", "01-Apr-02", "30-Oct-02", "21-Nov-02", "26-Mar-03", "06-Jun-03", "17-Jun-03", "05-Nov-03", "12-Nov-03", "13-Nov-03", "21-Nov-03", "30-Oct-02", "21-Nov-02", "26-Mar-03", "06-Jun-03", "17-Jun-03", "05-Nov-03", "12-Nov-03", "13-Nov-03", "21-Nov-03", "13-Nov-03", "21-Nov-03")), .Names = c("ID", "Status", "Date1", "Date2"), class = "data.frame", row.names = c(NA, -46L)) DataB<- structure(list(ID = c(1L, 1L, 2L, 8L, 8L, 8L, 8L, 8L), Date.Accident = c("06-Sep-01", "20-Jan-05", "06-Mar-02", "25-Jun-01", "29-Oct-01", "18-Mar-02", "06-Jun-03", "16-Jan-04")), .Names = c("ID", "Date.Accident"), row.names = c(NA, -8L), class = "data.frame") library(stringr) DataB[,2]<- str_trim(DataB[,2]) DataA<- read.csv("FarnooshDataA.csv",sep=",",stringsAsFactors=FALSE) DataA[,3]<- str_trim(DataA[,3]) DataA[,4]<- str_trim(DataA[,4]) dataNew1<-do.call(rbind,lapply(unique(DataA$ID),function(i) {x1<- DataA[DataA$ID==i,]; x2<-DataB[DataB$ID==i,]; do.call(rbind,lapply(seq_len(nrow(x2)),function(i){x3<- unlist(mapply(function(x,y) which(x%in% y),x1[,3:4],x2[i,2])); x3<-if(is.matrix(x3)) x3[1,] else x3; x4<- if(length(x3)==2) x1[x3[which.min(x3)],!names(x1)%in% names(x3[which.max(x3)])] else if(length(x3)==1) x1[x3,c("ID","Status",names(x3))] else NULL; if(length(x4)>0){colnames(x4)[3]<- colnames(DataB)[2];x4} else NULL})) })) DataA1<- DataA DataA1$Date1<-as.Date(DataA1$Date1,format="%d-%b-%y") DataA1$Date2<-as.Date(DataA1$Date2,format="%d-%b-%y") DataB1<- DataB DataB1$Date.Accident<- as.Date(DataB1$Date.Accident,format="%d-%b-%y") dataNew2<-do.call(rbind,lapply(unique(DataA1$ID),function(i) { x1<- DataA1[DataA1$ID==i,] ;x1New<- t(x1); x2<-DataB1[DataB1$ID==i,]; do.call(rbind,lapply(seq_len(nrow(x2)),function(i){x3<-x1[x2[i,2]>x1New[3,] & x2[i,2]< x1New[4,] ,]; x4<-x2[i,][nrow(x3)>0];merge(x3[,1:2],x4) }))})) dataNew2$Date.Accident<-format(dataNew2$Date.Accident,"%d-%b-%y") library(plyr) res<-join(rbind(dataNew1,dataNew2),DataB,by=c("ID","Date.Accident"),type="right") res # ID Date.Accident Status #1 1 06-Sep-01 A #2 1 20-Jan-05 <NA> #3 2 06-Mar-02 B #4 8 25-Jun-01 A #5 8 29-Oct-01 B #6 8 18-Mar-02 B #7 8 06-Jun-03 B #8 8 16-Jan-04 <NA> dat2<- data.frame(ID=unique(DataA$ID)) res2<-join(dat2,res,by="ID",type="full") res2$Status[is.na(res2$Date.Accident) & is.na(res2$Status)]<-0 res2 # ID Date.Accident Status #1 1 06-Sep-01 A #2 1 20-Jan-05 <NA> #3 2 06-Mar-02 B #4 3 <NA> 0 #5 8 25-Jun-01 A #6 8 29-Oct-01 B #7 8 18-Mar-02 B #8 8 06-Jun-03 B #9 8 16-Jan-04 <NA> This is similar to the output you wanted in the attached excel sheet except that the status for "29-Oct-01" should be B as per the rules. A.K. ID Date.Accident Result 06-Sep-01 A 1 20-Jan-05 NA 2 06-Mar-02 B 25-Jun-01 A 29-Oct-01 A 8 18-Mar-02 B 06-Jun-03 B 16-Jan-04 NA ________________________________ Sent: Saturday, April 13, 2013 3:39 PM Subject: Re: Comparison of Date format Hi Arun, Thanks you so much for your help and effort. I have attached an excel file that I put the conditions in it. It might help for coding. I basically want to check if the dateAccident is between date1 and date2--->return(status in that row) If dateAccident =date1 and date2 ---> return (Status for Previous row) If dateAccident=date1 and itsNOT equal to date2 --->return the status for date1 If ID in DateA not found in ID in DateB---> (I'm not sure what I can return here) I can return only ID with status 0(?) Thanks again. Best,Farnoosh Sheikhi ________________________________ Cc: R help <r-help@r-project.org> Sent: Friday, April 12, 2013 10:19 PM Subject: Re: Comparison of Date format HI, In cases like below: DataA<- read.table(text=" ID,Status,Date1,Date2 1,A,3-Feb-01,15-May-01 1,B,15-May-01,16-May-01 1,A,16-May-01,3-Sep-01 1,B,3-Sep-01,13-Sep-01 1,C,13-Sep-01,26-Feb-04 2,A,9-Feb-01,24-May-01 2,B,24-May-01,25-May-01 2,A,25-May-01,16-Mar-02 2,A,6-Mar-02,18-Mar-02 2,A,14-Sep-01,6-Mar-02 ",sep=",",header=TRUE,stringsAsFactors=FALSE) library(stringr) DataA[,3]<- str_trim(DataA[,3]) DataA[,4]<- str_trim(DataA[,4]) DataB<- read.table(text=" ID Date.Accident 1 3-Sep-01 1 20-Jan-05 1 26-Feb-04 2 25-May-01 2 6-Mar-02 ",sep="",header=TRUE,stringsAsFactors=FALSE) DataB[,2]<- str_trim(DataB[,2]) #Needed modification in the 2nd method: dataNew2<-do.call(rbind,lapply(unique(DataA$ID),function(i){x1<- DataA[DataA$ID==i,]; x2<- DataB[DataB$ID==i,]; do.call(rbind,lapply(seq_len(nrow(x2)),function(i) {x3<- unlist(mapply(function(x,y) which(x==y), x1[,3:4],x2[i,2])); x4<- if(length(x3)==2) x1[x3[which.min(x3)],!names(x1)%in%names(x3[which.max(x3)])] else if(length(x3)==1) x1[x3,c("ID","Status",names(x3))] else NULL; if(length(x4)>0) {colnames(x4)[3]<- colnames(DataB)[2]; x4} else NULL})) })) library(plyr) res2<- join(dataNew2,DataB,by=c("Date.Accident","ID"),type="right") res2 # Date.Accident ID Status #1 3-Sep-01 1 A #2 20-Jan-05 1 <NA> #3 26-Feb-04 1 C #4 25-May-01 2 B #5 6-Mar-02 2 A Using the first method: lst1<-lapply(seq_len(nrow(DataB)),function(i) {x1<-unlist(mapply(function(x,y) which(x==y),DataA[,3:4],DataB[i,2]));x2<-if(length(x1)==2) DataA[x1[which.min(x1)],!names(DataA)%in%names(x1[which.max(x1)])] else if(length(x1)==1) DataA[x1,c("ID","Status",names(x1))] else NULL}) lst2<-lapply(lst1,data.frame) lst2<-lst2[lapply(lst2,nrow)!=0] library(plyr) dataNew<-do.call(rbind,lapply(lst2,function(x) {colnames(x)[3]<- colnames(DataB)[2];x})) res<-join(dataNew,DataB,by=c("Date.Accident","ID"),type="right") identical(res,res2) #[1] TRUE A.K. ----- Original Message ----- Cc: R help <r-help@r-project.org> Sent: Saturday, April 13, 2013 12:41 AM Subject: Re: Comparison of Date format Hi, In the example you provided, it looks like the dates in Date2 happens first. So, I changed it a bit. DataA<- read.table(text=" ID,Status,Date1,Date2 1,A,3-Feb-01,15-May-01 1,B,15-May-01,16-May-01 1,A,16-May-01,3-Sep-01 1,B,3-Sep-01,13-Sep-01 1,C,13-Sep-01,26-Feb-04 2,A,9-Feb-01,24-May-01 2,B,24-May-01,25-May-01 2,A,25-May-01,16-Mar-02 2,A,6-Mar-02,18-Mar-02 2,A,14-Sep-01,6-Mar-02 ",sep=",",header=TRUE,stringsAsFactors=FALSE) library(stringr) DataA[,3]<- str_trim(DataA[,3]) DataA[,4]<- str_trim(DataA[,4]) DataB<- read.table(text=" ID Date.Accident 1 3-Sep-01 1 20-Jan-05 1 26-Feb-04 2 6-Mar-02 ",sep="",header=TRUE,stringsAsFactors=FALSE) lst1<-lapply(seq_len(nrow(DataB)),function(i) {x1<-unlist(mapply(function(x,y) which(x==y),DataA[,3:4],DataB[i,2]));x2<-if(length(x1)==2) DataA[x1[which.min(x1)],!names(DataA)%in%names(x1[which.max(x1)])] else if(length(x1)==1) DataA[x1,c("ID","Status",names(x1))] else NULL}) lst2<-lapply(lst1,data.frame) lst2<-lst2[lapply(lst2,nrow)!=0] lst2 #[[1]] # ID Status Date2 #3 1 A 3-Sep-01 #[[2]] # ID Status Date2 #5 1 C 26-Feb-04 #[[3]] # ID Status Date1 #9 2 A 6-Mar-02 library(plyr) dataNew<-do.call(rbind,lapply(lst2,function(x) {colnames(x)[3]<- colnames(DataB)[2];x})) res<-join(dataNew,DataB,by=c("Date.Accident","ID"),type="right") res # Date.Accident ID Status #1 3-Sep-01 1 A #2 20-Jan-05 1 <NA> #3 26-Feb-04 1 C #4 6-Mar-02 2 A #or you can split by ID lst1New<-lapply(unique(DataA$ID),function(i){x1<- DataA[DataA$ID==i,]; x2<- DataB[DataB$ID==i,]; do.call(rbind,lapply(seq_len(nrow(x2)),function(i) {x3<- unlist(mapply(function(x,y) which(x==y), x1[,3:4],x2[i,2])); x4<- if(length(x3)==2) x1[x3[which.min(x3)],!names(x1)%in%names(x3[which.max(x3)])] else if(length(x3)==1) x1[x3,c("ID","Status",names(x3))] else NULL})) }) lst1New #[[1]] # ID Status Date2 #3 1 A 3-Sep-01 #5 1 C 26-Feb-04 #[[2]] # ID Status Date1 #9 2 A 6-Mar-02 dataNew1<- do.call(rbind,lapply(lst1New,function(x) {colnames(x)[3]<- colnames(DataB)[2];x})) res1<- join(dataNew1,DataB,by=c("Date.Accident","ID"),type="right") res1 # Date.Accident ID Status #1 3-Sep-01 1 A #2 20-Jan-05 1 <NA> #3 26-Feb-04 1 C #4 6-Mar-02 2 A A.K. ________________________________ Sent: Friday, April 12, 2013 5:40 PM Subject: Comparison of Date format Hi there, Hope all is well. I have a complicated data and I need to create a new variable based on the date and description of the data. I really appreciate if you can help me. Here is how data look like: DataA DataB ID Status Date1 Date2 ID Date.Accident Result 1 A 3-Feb-01 15-May-01 1 3-Sep-01 A 1 B 15-May-01 16-May-01 1 20-Jan-05 NA 1 A 16-May-01 3-Sep-01 1 B 3-Sep-01 13-Sep-01 1 C 13-Sep-01 26-Feb-04 2 A 9-Feb-01 24-May-01 2 6-Mar-02 A 2 B 24-May-01 25-May-01 2 A 25-May-01 6-Mar-02 2 A 6-Mar-02 18-Mar-02 I want to compare dataA to B for each ID. if Date 1 or Date 2 matches to Date.Accident return the result as status in dataA as a new result in Data B. The trick here is I have two dates that are matched, but I want the status of the one that happen first. The sample size of each data is not the same. I really appreciate your time and help. Thanks. [[alternative HTML version deleted]]
______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.