Hi Denes, thank you very much for your mesasge!! I would like to learn from you.
We generated preliminary data for a new project, so performing explorative analysis. since only sample size is relative small, I probably will both Cox regression and firth's Cox regresion. there are 2972 markers; for each marker, there are different ways to convert multi-allele genotype data to bi-allele genotype data based on allele distribution pattern in each marker. for each converted bi-allele variable, I ran Cox model in two different ways ( categorical variable, and numeric variable) since distribution patterns are different for different markers, I tested them and added some restrictions using if else condition. I have wide computer screen, one line of code is long. how do you arrange the data analysis process if you perform those analysis? Thank you, Ding #dat file includes multi-allele genotype data for 2972 markers in row and 187 individuals in columns for (i in 1:nrow(dat)){ tem <- as.data.frame(t(dat[i,,drop=F])) #subset data for one of 2972 markers names(tem)<-"V1" tem <- tem[which(tem$V1!=""),,drop=F] tem1 <-separate(tem, col=V1, into=c("m1","m2"), convert = T) row.names(tem1) <-substr(row.names(tem1), 17, nchar(row.names(tem1))-7) tem2 <-tem1 tem3 <-gather(tem2, marker, VNTR_repeats, m1:m2) tem4 <-as.data.frame(t(t(table(tem3$VNTR_repeats))))[,c(1,3)] tem4$Var1 <-as.numeric(as.character(tem4$Var1)) tem4 <-tem4[order(tem4$Var1),] tem5 <-tem4[c(TRUE,diff(tem4$Freq)!=0), ] if(length(tem5$Freq)<=2) {tem6 <-tem4} else {tem6 <-tem5[which.maxs (tem5$Freq, include.endpoints=TRUE),]} if((tem6$Var1[1]==tem5$Var1[1]) & length(tem6$Var1)>1) tem6 <-tem6[-1,] c <-length(tem6$Var1) # for each marker, c different ways to convert multi-allele data into bi-allele data #to convert multi-allele data into bi-allele data for one marker for ( m in 1:c) { if(tem6$Var1[m]==tem5$Var1[1]) { tem7 <-tem2%>% mutate(m3 = case_when(m1>tem6$Var1[m]~"L", m1<=tem6$Var1[m] ~"S"), m4 = case_when(m2>tem6$Var1[m]~"L", m2<=tem6$Var1[m] ~"S"))} else {tem7 <-tem2%>% mutate(m3 = case_when(m1>=tem6$Var1[m]~"L", m1<tem6$Var1[m] ~"S"), m4 = case_when(m2>=tem6$Var1[m]~"L", m2<tem6$Var1[m] ~"S"))} cut_off <-paste(strsplit(as.character(row.names(dat)[i]),'_',fixed=TRUE)[[1]][1], "cutoff_repeat", tem6$Var1[m], sep="_") tem7[[cut_off]] <-paste(tem7$m3, tem7$m4, sep="_") tem7[[cut_off]] <-ifelse(tem7[[cut_off]] =="L_S", "S_L", tem7[[cut_off]]) tem2 <-tem7 } #to merge phenotypic data and bi-allele data for Cox regression analysis # treat bi-allele data as categorical data (factor) tem8 <-tem7[,-c(1:4),drop=F] row.names(tem8) <-row.names(tem1) tem9 <-merge(pheno, tem8, by.x="id", by.y=0) tem10 <- matrix(NA, nrow=c, ncol=11) #Cox models for one marker ( c different categorical variables) for (n in 1:c) { form=as.formula(paste("Surv(age, status) ~ ", factor(colnames(tem9)[n + 8]))) res <- coxph(formula=form, data=tem9) s <- summary(res) if (length(table(tem9[[colnames(tem9)[n + 8]]])) ==3) tem10[n,] <- c(row.names(s[[8]])[1], s[[4]], s[[7]][1,5], s[[8]][1,1], s[[8]][1,3], s[[8]][1,4], row.names(s[[8]])[2], s[[7]][2,5], s[[8]][2,1], s[[8]][2,3], s[[8]][2,4]) if (length(table(tem9[[colnames(tem9)[n + 8]]])) ==2) tem10[n,] <- c(row.names(s[[8]])[1], s[[4]], s[[7]][1,5], s[[8]][1,1], s[[8]][1,3], s[[8]][1,4], NA, NA, NA, NA, NA) } #tem10 contains Cox regression results tem10 <-as.data.frame(tem10) names(tem10)[1:11]<-c("genotype1", "N_sample","Pr1","HR1","HR_lower1" ,"HR_upper1", "genotype2", "Pr2","HR2","HR_lower2" ,"HR_upper2") # for the same marker, convert each categorical variable as numeric variables (0, 1, 2 copies of long alleles) if (ncol(tem8)==1) tem8$dup <-tem8[,1] tem11 <- as.data.frame(do.call(cbind, lapply(tem8[,1:ncol(tem8)], function(i) mgsub(pattern = c("S_S", "S_L","L_L"), replacement = c("0", "1","2"), i)))) tem12 <- as.data.frame(do.call(cbind, lapply(tem11[,1:ncol(tem11)], function(i) as.numeric(as.character(i))))) row.names(tem12)<-row.names(tem8) colnames(tem12) <-paste(colnames(tem12),"numeric",sep="_") tem13 <-merge(pheno, tem12, by.x="id", by.y=0) tem14 <- matrix(NA, nrow=c, ncol=6) #Cox models for numberic variables for (p in 1:c) { form=as.formula(paste("Surv(age, status) ~ ", colnames(tem13)[p + 8])) res14 <- coxph(formula=form, data=tem13) s14 <- summary(res14) # Cox model results in tem14 tem14[p,] <- c(row.names(s14[[8]])[1], s14[[4]], s14[[7]][1,5], s14[[8]][1,1], s14[[8]][1,3], s14[[8]][1,4]) } tem14 <-as.data.frame(tem14) names(tem14)[1:6]<-c("genotype", "N_sample","Pr","HR","HR_lower" ,"HR_upper") #put Cox model input data files (tem8 and tem12) and Cox model results (tem10 and tem14) into a list object #and assign marker id as file name for the list object assign(row.names(dat)[i], list(tem8,tem10, tem12, tem14)) } list2972 <-ls(pat="VNTR.*.") # to combine cox model input files and results dat.cat <- do.call(rbind, lapply(list2972, function(x)get(x)[1])) res.cat <- do.call(rbind, lapply(list2972, function(x)get(x)[[2]])) dat.num <- do.call(rbind, lapply(list2972, function(x)get(x)[3])) res.num <- do.call(rbind, lapply(list2972, function(x)get(x)[4])) ________________________________________ From: Dénes Tóth [toth.de...@kogentum.hu] Sent: Wednesday, March 18, 2020 2:35 PM To: Bert Gunter; Yuan Chun Ding Cc: r-help mailing list Subject: Re: [R] row combining 2972 files On 3/18/20 9:02 PM, Bert Gunter wrote: > Untested in the absence of example data, but I think > > combined <- do.call(rbind, lapply(ls2972, function(x)get(x)[[2]])) Or if you have largish data, use rbindlist() from the data.table package: combined <- data.table::rbindlist( lapply(ls2972, function(x) get(x)[[2]]) ) However, it seems you are on the wrong track when you create 2972 lists in your workspace. (Note: there are no "list files" objects in R. Lists are objects, not files.) You should have one list of 2972 lists each having 4 data.frames. E.g.: x <- list( list( data.frame(), data.frame(x = 1), data.frame(), data.frame() ), list( data.frame(), data.frame(x = 2), data.frame(), data.frame() ), list( data.frame(), data.frame(x = 3), data.frame(), data.frame() ) ) keep <- lapply(x, "[[", 2L) combined <- data.table::rbindlist(keep) HTH, Denes > > should do it. > > > Bert Gunter > > "The trouble with having an open mind is that people keep coming along and > sticking things into it." > -- Opus (aka Berkeley Breathed in his "Bloom County" comic strip ) > > > On Wed, Mar 18, 2020 at 12:16 PM Yuan Chun Ding <ycd...@coh.org> wrote: > >> Hi R users, >> >> I generated 2972 list files in R, each list includes four data frame files >> , file names for those list file are VNTR13576, VNTR14689, etc. the second >> data frame in each list has the same 11 column names, but different number >> of rows. >> >> I can combine two dataframes by >> list2972 <-ls(pat="VNTR.*.") >> test <-rbind(get(list2972[16])[[2]],get(list2972[166])[[2]] ) >> >> I tried to combine all 2972 data frames from those 2972 list files using >> do.call or lapply function, but not successful. >> >> Can you help me? >> >> Thank you very much! >> >> Ding >> >> ---------------------------------------------------------------------- >> ------------------------------------------------------------ >> -SECURITY/CONFIDENTIALITY WARNING- >> >> This message and any attachments are intended solely for the individual or >> entity to which they are addressed. This communication may contain >> information that is privileged, confidential, or exempt from disclosure >> under applicable law (e.g., personal health information, research data, >> financial information). Because this e-mail has been sent without >> encryption, individuals other than the intended recipient may be able to >> view the information, forward it to others or tamper with the information >> without the knowledge or consent of the sender. If you are not the intended >> recipient, or the employee or person responsible for delivering the message >> to the intended recipient, any dissemination, distribution or copying of >> the communication is strictly prohibited. If you received the communication >> in error, please notify the sender immediately by replying to this message >> and deleting the message and any accompanying files from your system. If, >> due to the security risks, you do not wish to receive further >> communications via e-mail, please reply to this message and inform the >> sender that you do not wish to receive further e-mail from the sender. >> (LCP301) >> >> ______________________________________________ >> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >> https://urldefense.com/v3/__https://stat.ethz.ch/mailman/listinfo/r-help__;!!Fou38LsQmgU!81csfRTX5GGrC7eTO0C1mv_eFQ7glT3yULsK-xiJfcvfjvY8d49Iiff6h_xX$ >> PLEASE do read the posting guide >> https://urldefense.com/v3/__http://www.R-project.org/posting-guide.html__;!!Fou38LsQmgU!81csfRTX5GGrC7eTO0C1mv_eFQ7glT3yULsK-xiJfcvfjvY8d49IiQj8zA2P$ >> and provide commented, minimal, self-contained, reproducible code. >> > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://urldefense.com/v3/__https://stat.ethz.ch/mailman/listinfo/r-help__;!!Fou38LsQmgU!81csfRTX5GGrC7eTO0C1mv_eFQ7glT3yULsK-xiJfcvfjvY8d49Iiff6h_xX$ > PLEASE do read the posting guide > https://urldefense.com/v3/__http://www.R-project.org/posting-guide.html__;!!Fou38LsQmgU!81csfRTX5GGrC7eTO0C1mv_eFQ7glT3yULsK-xiJfcvfjvY8d49IiQj8zA2P$ > and provide commented, minimal, self-contained, reproducible code. > ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.