This should do it: > x id COD MDid 1 1 A01 1 2 2 A02 2 3 3 A03 3 4 4 A04 4 5 5 A05 5 6 1 B01 6 7 2 A02 3 8 3 B03 5 9 4 B04 7 10 5 A05 2 > # max number of columns > maxCol <- max(table(x$id)) > # now create the output > y <- lapply(split(x, x$id), function(.id){ + .cod <- as.character(.id$COD) + .mdid <- .id$MDid + length(.cod) <- maxCol + length(.mdid) <- maxCol + c(.id$id[1], .cod, .mdid) + }) > # create the dataframe > y <- as.data.frame(do.call(rbind, y)) > # add column names > names(y) <- c('id', paste("COD", seq(maxCol), sep='.'), + paste("MDid", seq(maxCol), sep='.')) > y id COD.1 COD.2 MDid.1 MDid.2 1 1 A01 B01 1 6 2 2 A02 A02 2 3 3 3 A03 B03 3 5 4 4 A04 B04 4 7 5 5 A05 A05 5 2 > >
On Tue, Jun 23, 2009 at 5:52 PM, Alan Cohen <coh...@smh.toronto.on.ca>wrote: > Hi all, > > I am trying to convert a data set of physician death codings (each > individual's cause of death is coded by multiple physicians) from long to > wide format, but the "reshape" function doesn't seem to work because it > requires a "time" variable to identify the sequence among the repeated > observations within individuals. My data set has no order, and different > numbers of physicians code each death, up to 23. It is also quite large, so > for-loops are very slow, and I'll need to repeat the procedure multiple > times. So I'm looking for a processor-efficient way to replicate "reshape" > without a time variable. > > Thanks in advance for any help you can provide. A worked example and some > code I've tried are below. I'm working with R v2.8.1 on Windows XP > Professional. > > Cheers, > Alan Cohen > > Here's what my data look like now: > > > id <- rep(1:5,2) > > COD <- c("A01","A02","A03","A04","A05","B01","A02","B03","B04","A05") > > MDid <- c(1:6,3,5,7,2) > > data <- as.data.frame(cbind(id,COD,MDid)) > > data > id COD MDid > 1 1 A01 1 > 2 2 A02 2 > 3 3 A03 3 > 4 4 A04 4 > 5 5 A05 5 > 6 1 B01 6 > 7 2 A02 3 > 8 3 B03 5 > 9 4 B04 7 > 10 5 A05 2 > > And here's what I'd like them to look like: > > > id2 <- 1:5 > > COD.1 <- c("A01","A02","A03","A04","A05") > > COD.2 <- c("B01","A02","B03","B04","A05") > > MDid.1 <- 1:5 > > MDid.2 <-c(6,3,5,7,2) > > data.wide <- as.data.frame(cbind(id2,COD.1,COD.2,MDid.1,MDid.2)) > > data.wide > id2 COD.1 COD.2 MDid.1 MDid.2 > 1 1 A01 B01 1 6 > 2 2 A02 A02 2 3 > 3 3 A03 B03 3 5 > 4 4 A04 B04 4 7 > 5 5 A05 A05 5 2 > > Here's the for-loop that's very slow (with or without the if-clauses > activated): > > ids<-unique(data$id) > ct<-length(ids) > codes<-matrix(0,ct,11) > > colnames(codes)<-c("ID","ICD1","Coder1","ICD2","Coder2","ICD3","Coder3","ICD4","Coder4","ICD5","Coder5") > j<-0 > for (i in 1:ct){ > kkk <- ids[i] > rpt<-data[data$id==kkk,] > j<-max(j,nrow(rpt)) > codes[i,1]<-kkk > codes[i,2]<-rpt$ICDCode[1] > codes[i,3]<-rpt$T_Physician_ID[1] > #if (nrow(rpt)>=2){ > codes[i,4]<-rpt$ICDCode[2] > codes[i,5]<-rpt$T_Physician_ID[2] > #if (nrow(rpt)>=3) { > codes[i,6]<-rpt$ICDCode[3] > codes[i,7]<-rpt$T_Physician_ID[3] > #if (nrow(rpt)>=4) { > codes[i,8]<-rpt$ICDCode[4] > codes[i,9]<-rpt$T_Physician_ID[4] > #if (nrow(rpt)>=5) { > codes[i,10]<-rpt$ICDCode[5] > codes[i,11]<-rpt$T_Physician_ID[5] > #}}}} > } > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html<http://www.r-project.org/posting-guide.html> > and provide commented, minimal, self-contained, reproducible code. > -- Jim Holtman Cincinnati, OH +1 513 646 9390 What is the problem that you are trying to solve? [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.