Hi, Try this:
files<-paste("MSMS_",23,"PepInfo.txt",sep="") read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))} lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data)) names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") res2<-split(lista,names(lista)) res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x}) #Freq whole data res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]],function(x) as.data.frame(table(factor(x$z,levels=1:3)))))) names(res4)<- names(res2) library(reshape2) freq.i1<-do.call(rbind,lapply(res4,function(x) dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value"))) freq.i1 # id 1 2 3 #group_a a1 1 12 6 #group_c.1 c1 0 10 3 #group_c.2 c2 0 12 3 #group_c.3 c3 0 13 4 #group_t.1 t1 0 10 4 #group_t.2 t2 1 12 6 freq.rel.i1<- as.matrix(freq.i1[,-1]/rowSums(freq.i1[,-1]) ) freq.rel.i1 # 1 2 3 #group_a 0.05263158 0.6315789 0.3157895 #group_c.1 0.00000000 0.7692308 0.2307692 #group_c.2 0.00000000 0.8000000 0.2000000 #group_c.3 0.00000000 0.7647059 0.2352941 #group_t.1 0.00000000 0.7142857 0.2857143 #group_t.2 0.05263158 0.6315789 0.3157895 #Freq with FDR< 0.01 res5<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]],function(x) as.data.frame(table(factor(x$z[x[["FDR"]]<0.01],levels=1:3)))))) names(res5)<- names(res2) freq.f1<- do.call(rbind,lapply(res5,function(x) dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value"))) freq.f1 # id 1 2 3 #group_a a1 1 10 5 #group_c.1 c1 0 7 2 #group_c.2 c2 0 8 2 #group_c.3 c3 0 6 4 #group_t.1 t1 0 7 4 #group_t.2 t2 1 10 5 freq.rel.f1<- as.matrix(freq.f1[,-1]/rowSums(freq.f1[,-1])) colour<-sample(rainbow(nrow(freq.rel.i1))) par(mfrow=c(1,2)) barplot(freq.rel.i1,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i1)) barplot(freq.rel.f1,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f1)) #change the legend position Also, didn't check the rest of the code from chisquare test. A.K. ________________________________ From: Vera Costa <veracosta...@gmail.com> To: arun <smartpink...@yahoo.com> Sent: Tuesday, February 19, 2013 4:19 PM Subject: Re: reading data Here is the code and some outputs. z.plot <- function(directory,number) { #reading data setwd(directory) direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE) directT <- direct[grepl("^t", direct)] directC <- direct[grepl("^c", direct)] lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t")) listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t")) listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t")) #count different z values cab <- vector() for (i in 1:length(lista)) { dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),] dc<-table(dc$z) cab <- c(cab, names(dc)) } #Relative freqs to construct the graph cab <- unique(cab) print(cab) ###[1] "2" "3" "1" d <- matrix(ncol=length(cab)) dci<- d[-1,] dcf <- d[-1,] dti <- d[-1,] dtf <- d[-1,] for (i in 1:length(listaC)) { #Relative freq of all data dcc<-listaC[[i]] dcc<-table(factor(dcc$z, levels=cab)) dci<- rbind(dci, dcc) rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c") #Relative freq of data with FDR<0.01 dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),] dcc1<-table(factor(dcc1$z, levels=cab)) dcf<- rbind(dcf,dcc1) rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c") } for (i in 1:length(listaT)) { #Relative freq of all data dct<-listaT[[i]] dct<-table(factor(dct$z, levels=cab)) dti<- rbind(dti, dct) rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t") #Relative freq of data with FDR<0.01 dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),] dct1<-table(factor(dct1$z, levels=cab)) dtf<- rbind(dtf,dct1) rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t") } freq.i<-rbind(dci,dti) freq.f<-rbind(dcf,dtf) freq.rel.i<-freq.i/apply(freq.i,1,sum) freq.rel.f<-freq.f/apply(freq.f,1,sum) print(freq.i) ## 2 3 1 #c1 10 3 0 #c2 12 3 0 #c3 13 4 0 #t1 10 4 0 #t2 12 6 1 print(freq.f) ### 2 3 1 #c1 7 2 0 #c2 8 2 0 #c3 6 4 0 #t1 7 4 0 #t2 10 5 1 print(freq.rel.i) ### 2 3 1 #c1 0.7692308 0.2307692 0.00000000 #c2 0.8000000 0.2000000 0.00000000 #c3 0.7647059 0.2352941 0.00000000 #t1 0.7142857 0.2857143 0.00000000 #t2 0.6315789 0.3157895 0.05263158 print(freq.rel.f) ### 2 3 1 #c1 0.7777778 0.2222222 0.0000 #c2 0.8000000 0.2000000 0.0000 #c3 0.6000000 0.4000000 0.0000 #t1 0.6363636 0.3636364 0.0000 #t2 0.6250000 0.3125000 0.0625 #Graph plot colour<-sample(rainbow(nrow(freq.rel.i))) par(mfrow=c(1,2)) barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i)) barplot(freq.rel.f,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f)) #average of the group (except c1&t1) freqs<-rbind(dcf[-1,], dtf[-1,]) average<-apply(freqs,2,mean) print(average) ### 2 3 1 #8.0000000 3.6666667 0.3333333 #chisquare test function chisq.test<-function(x,y){ somax<-sum(x) somay<-sum(y) nj.<-x+y nj<-sum(nj.) ejx<-(nj./nj)*somax ejy<-(nj./nj)*somay ETx<-((x-ejx)^2)/ejx ETy<-((y-ejy)^2)/ejy ETobs<-sum(ETx)+sum(ETy) pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE) return(pvalue) } #pvalues of the chisquare test between sample and average (H0: two samples has the same distribution) pvalues<-c() for (i in 1:(nrow(freqs))){ a<-chisq.test(freqs[i,],average) pvalues<-c(pvalues,a) } #data frame with final p-values dataframe<-data.frame(c(rownames(freqs)), c(pvalues)) colnames(dataframe)<-c("sample name","pvalue") print(dataframe) ### sample name pvalue #1 c2 0.7235907 #2 c3 0.7963287 #3 0.9079200 } z.plot("C:/Users/Vera Costa/Desktop/dados",23) ###and two barplots.. Here, I remove the group a1. Thank you 2013/2/19 arun <smartpink...@yahoo.com> Hi, > >Could you send the results for the folder that was sent to me? It will be >easy for me. > >Arun > > > > > > >________________________________ >From: Vera Costa <veracosta...@gmail.com> >To: arun <smartpink...@yahoo.com> >Sent: Tuesday, February 19, 2013 3:47 PM > >Subject: Re: reading data > > >Oh sorry, I change the folder. > >I send for your folder > > > >2013/2/19 arun <smartpink...@yahoo.com> > >Hello, >> >> >> Regarding the results, is it from the same folder that you sent to me?? >>I am getting different results by running your steps. >> >> >>direct<- list.files(recursive=TRUE) >> direct >>#[1] "a1/MSMS_23PepInfo.txt" "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt" >>#[4] "c3/MSMS_23PepInfo.txt" "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt" >> >> directT<- list.files(recursive=TRUE)[grepl("^t",dir())] >> >>directT >>#[1] "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt" >> >> >>directC<- list.files(recursive=TRUE)[grepl("^c",dir())] >> >>directC >>#[1] "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt" "c3/MSMS_23PepInfo.txt" >> >> >> >>lista<- lapply(direct,function(x) >>read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE)) >> >>listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = >>"\t",fill=TRUE)) >>listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = >>"\t",fill=TRUE)) >> >> #count different z values >> cab <- vector() >> for (i in 1:length(lista)) { >> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),] >> dc<-table(dc$z) >> cab <- c(cab, names(dc)) >> } >> >> #Relative freqs to construct the graph >> cab <- unique(cab) >> print(cab) >> >>#[1] "1" "2" "3" #Here results are not correct >> >> >>d <- matrix(ncol=length(cab)) >> dci<- d[-1,] >> dcf <- d[-1,] >> dti <- d[-1,] >> dtf <- d[-1,] >> >> for (i in 1:length(listaC)) { >> >> #Relative freq of all data >> dcc<-listaC[[i]] >> dcc<-table(factor(dcc$z, levels=cab)) >> dci<- rbind(dci, dcc) >> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c") >> >> >> #Relative freq of data with FDR<0.01 >> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),] >> dcc1<-table(factor(dcc1$z, levels=cab)) >> dcf<- rbind(dcf,dcc1) >> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c") >> } >> print(dci) #here too. >> >># 1 2 3 >>#c1 0 10 3 >>#c2 0 12 3 >>#c3 0 13 4 >> >> >>It is important to clear this before I make any changes to the script. You >>need to send me the output of the same data folder to understand what is >>going on. >> >> >>Arun >>________________________________ >>From: Vera Costa <veracosta...@gmail.com> >>To: arun <smartpink...@yahoo.com> >>Sent: Tuesday, February 19, 2013 9:24 AM >> >>Subject: Re: reading data >> >> >>Ok. >> >>Here is the code and some outputs. >> >>z.plot <- function(directory,number) { >> #reading data >> setwd(directory) >> direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), >>full.names = FALSE, recursive = TRUE) >> directT <- direct[grepl("^t", direct)] >> directC <- direct[grepl("^c", direct)] >> >> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t")) >> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t")) >> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t")) >> >> #count different z values >> cab <- vector() >> for (i in 1:length(lista)) { >> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),] >> dc<-table(dc$z) >> cab <- c(cab, names(dc)) >> } >> >> #Relative freqs to construct the graph >> cab <- unique(cab) >> print(cab) >> >>###[1] "1" "2" "3" "4" "5" >> >> >> >> d <- matrix(ncol=length(cab)) >> dci<- d[-1,] >> dcf <- d[-1,] >> dti <- d[-1,] >> dtf <- d[-1,] >> >> for (i in 1:length(listaC)) { >> >> #Relative freq of all data >> dcc<-listaC[[i]] >> dcc<-table(factor(dcc$z, levels=cab)) >> dci<- rbind(dci, dcc) >> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c") >> >> >> #Relative freq of data with FDR<0.01 >> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),] >> dcc1<-table(factor(dcc1$z, levels=cab)) >> dcf<- rbind(dcf,dcc1) >> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c") >> } >> print(dci) >> >>### 1 2 3 4 5 >>#c1 93 8356 3621 450 55 >>#c2 108 13513 6859 793 73 >>#c3 97 13526 6724 739 82 >>#c4 101 13417 6574 761 62 >> >> print(dcf) >> >>### 1 2 3 4 5 >>#c1 10 4576 2100 199 17 >>#c2 7 7831 4039 314 23 >>#c3 16 7887 4087 286 22 >>#c4 20 7824 4045 311 20 >> >> for (i in 1:length(listaT)) { >> >> #Relative freq of all data >> dct<-listaT[[i]] >> dct<-table(factor(dct$z, levels=cab)) >> dti<- rbind(dti, dct) >> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t") >> >> >> #Relative freq of data with FDR<0.01 >> dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),] >> dct1<-table(factor(dct1$z, levels=cab)) >> dtf<- rbind(dtf,dct1) >> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t") >> } >> >> print(dti) >> >>### 1 2 3 4 5 >>#t1 32 8640 4098 429 36 >>#t2 128 13209 6723 788 75 >>#t3 85 13043 6691 754 82 >>#t4 139 13750 7036 807 84 >> >> print(dtf) >> >> >>#### 1 2 3 4 5 >>#t1 5 4885 2571 196 8 >>#t2 12 7752 4209 360 28 >>#t3 19 7563 4086 336 18 >>#t4 14 8108 4218 312 26 >> >> >> freq.i<-rbind(dci,dti) >> freq.f<-rbind(dcf,dtf) >> freq.rel.i<-freq.i/apply(freq.i,1,sum) >> freq.rel.f<-freq.f/apply(freq.f,1,sum) >> print(freq.i) >>## 1 2 3 4 5 >>#c1 93 8356 3621 450 55 >>#c2 108 13513 6859 793 73 >>#c3 97 13526 6724 739 82 >>#c4 101 13417 6574 761 62 >>#t1 32 8640 4098 429 36 >>#t2 128 13209 6723 788 75 >>#t3 85 13043 6691 754 82 >>#t4 139 13750 7036 807 84 >> >> print(freq.f) >> ### 1 2 3 4 5 >>#c1 10 4576 2100 199 17 >>#c2 7 7831 4039 314 23 >>#c3 16 7887 4087 286 22 >>#c4 20 7824 4045 311 20 >>#t1 5 4885 2571 196 8 >>#t2 12 7752 4209 360 28 >>#t3 19 7563 4086 336 18 >>#t4 14 8108 4218 312 26 >> >> print(freq.rel.i) >>### 1 2 3 4 5 >>#c1 0.007395626 0.6644930 0.2879523 0.03578529 0.004373757 >>#c2 0.005059496 0.6330460 0.3213248 0.03714982 0.003419844 >>#c3 0.004582389 0.6389834 0.3176493 0.03491119 0.003873772 >>#c4 0.004829070 0.6415013 0.3143199 0.03638537 0.002964380 >>#t1 0.002417832 0.6528145 0.3096335 0.03241405 0.002720060 >>#t2 0.006117670 0.6313148 0.3213210 0.03766190 0.003584572 >>#t3 0.004115226 0.6314694 0.3239409 0.03650448 0.003969983 >>#t4 0.006371470 0.6302714 0.3225156 0.03699120 0.003850385 >> print(freq.rel.f) >> >>### 1 2 3 4 5 >>#c1 0.0014488554 0.6629962 0.3042596 0.02883222 0.002463054 >>#c2 0.0005731128 0.6411495 0.3306861 0.02570820 0.001883085 >>#c3 0.0013010246 0.6413238 0.3323305 0.02325581 0.001788909 >>#c4 0.0016366612 0.6402619 0.3310147 0.02545008 0.001636661 >>#t1 0.0006523157 0.6373125 0.3354207 0.02557078 0.001043705 >>#t2 0.0009707952 0.6271337 0.3405064 0.02912386 0.002265189 >>#t3 0.0015804359 0.6290967 0.3398769 0.02794876 0.001497255 >>#t4 0.0011042751 0.6395330 0.3327023 0.02460956 0.002050797 >> >>#Graph plot >>colour<-sample(rainbow(nrow(freq.rel.i))) >>par(mfrow=c(1,2)) >>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative >>Frequencies",col=colour,legend.text = rownames(freq.rel.i)) >>barplot(freq.rel.f,beside=T,main=("Sample with >>FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = >>rownames(freq.rel.f)) >> >>#average of the group (except c1&t1) >>freqs<-rbind(dcf[-1,], dtf[-1,]) >>average<-apply(freqs,2,mean) >>print(average) >> >>### 1 2 3 4 5 >> # 14.66667 7827.50000 4114.00000 319.83333 22.83333 >> >>#chisquare test function >>chisq.test<-function(x,y){ >> somax<-sum(x) >> somay<-sum(y) >> nj.<-x+y >> nj<-sum(nj.) >> ejx<-(nj./nj)*somax >> ejy<-(nj./nj)*somay >> ETx<-((x-ejx)^2)/ejx >> ETy<-((y-ejy)^2)/ejy >> ETobs<-sum(ETx)+sum(ETy) >> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE) >> return(pvalue) >> } >> >>#pvalues of the chisquare test between sample and average (H0: two samples >>has the same distribution) >>pvalues<-c() >>for (i in 1:(nrow(freqs))){ >>a<-chisq.test(freqs[i,],average) >>pvalues<-c(pvalues,a) >>} >>print(pvalues) >>##[1] 0.5307206 0.6849480 0.8332661 0.3474956 0.5546527 0.9387602 >> >>#data frame with final p-values >>dataframe<-data.frame(c(rownames(freqs)), c(pvalues)) >>colnames(dataframe)<-c("sample name","pvalue") >>print(dataframe) >> >>### sample name pvalue >>#1 c2 0.5307206 >>#2 c3 0.6849480 >>#3 c4 0.8332661 >>#4 t2 0.3474956 >>#5 t3 0.5546527 >>#6 t4 0.9387602 >>} >>z.plot("C:/Users/Vera Costa/Desktop/dados",23) >> >>###and two barplots... >> >>Thank you >> >> >> >> >>2013/2/19 arun <smartpink...@yahoo.com> >> >>Got it. >>> >>>So, if I run your codes that you sent yesterday, will I get the correct >>>results for relative frequency etc. It would be also great if you can sent >>>me the output generated using your codes (on two groups as you showed >>>yesterday). It will help me in checking results much faster than running >>>your code and see if that is the result (because I have to do some >>>adjustment to your code for running in linux especially the ?dir()). >>> >>>I may be able to run it only later. >>> >>>Arun >>> >>> >>> >>> >>> >>> >>>________________________________ >>>From: Vera Costa <veracosta...@gmail.com> >>>To: arun <smartpink...@yahoo.com> >>>Sent: Tuesday, February 19, 2013 8:53 AM >>> >>>Subject: Re: reading data >>> >>> >>>I sent in second email. >>> >>>But I send again. >>> >>> >>> >>>2013/2/19 arun <smartpink...@yahoo.com> >>> >>> >>>> >>>>Your attachment didn't came through. >>>> >>>>Arun >>>> >>>> >>>> >>>> >>>>________________________________ >>>>From: Vera Costa <veracosta...@gmail.com> >>>>To: arun <smartpink...@yahoo.com> >>>>Sent: Tuesday, February 19, 2013 8:47 AM >>>> >>>>Subject: Re: reading data >>>> >>>> >>>>Sorry about a lot of questions. >>>> >>>>I attach a small part of my real data (I have a lot of row). >>>> >>>>My main objective is construct two graph. The first with the relative >>>>frequencies of each group (c1,c2,c3....). The second with the same >>>>frequencies but with FDR<0.01. >>>> >>>>After that I need to do the average in each group (but without the first >>>>group-c1,t1,a1....) and do the qui square test to see if the groups has the >>>>same distribution. You understand? >>>> >>>>At first, I had only two groups, and I did the code that I sent you. But I >>>>need a general code, not for two groups that I know the names, but for all >>>>groups (sometimes I can have 7 or 8 or 9 groups). >>>> >>>>it´s better now my explanation? :-) >>>>My English isn't also very good :-) >>>> >>>>Please not publish this data in forum... >>>> >>>>Thank you >>>> >>>> >>>> >>>> >>>>2013/2/18 arun <smartpink...@yahoo.com> >>>> >>>>Hi, >>>>> >>>>>I run the codes to understand what was going on. >>>>> >>>>>I didn't fully understand it as you constructed the codes for your >>>>>original dataset and not for the 'data` directory you sent to me. >>>>> >>>>>A.K. >>>>> >>>>> >>>>> >>>>> >>>>> >>>>>________________________________ >>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>To: arun <smartpink...@yahoo.com> >>>>>Sent: Monday, February 18, 2013 4:02 PM >>>>> >>>>>Subject: Re: reading data >>>>> >>>>> >>>>>Thank you. >>>>>I don't need the same,but equivalent. I will try your suggestions. >>>>>Thank you. >>>>>No dia 18 de Fev de 2013 19:41, "arun" <smartpink...@yahoo.com> escreveu: >>>>> >>>>>Hi, >>>>>>I am not able to open your graph. I am using linux. >>>>>> >>>>>>Also, the codes in the function are not reproducible >>>>>> directT <- direct[grepl("^t", direct)] >>>>>> directC <- direct[grepl("^c", direct)] >>>>>> >>>>>>It takes double the time to know what is going on. >>>>>> >>>>>>dir() >>>>>>#[1] "a1" "a2" "a3" "b1" "b2" "c1" >>>>>> >>>>>>direct<- list.files(recursive=TRUE)[grepl("^a|^b",dir())] >>>>>> >>>>>> direct >>>>>>#[1] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" >>>>>>#[4] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" >>>>>>directA<- list.files(recursive=TRUE)[grepl("^a",dir())] >>>>>>directB<- list.files(recursive=TRUE)[grepl("^b",dir())] >>>>>>lista<- lapply(direct,function(x) >>>>>>read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE)) >>>>>> >>>>>>listaA<-lapply(directA, function(x) read.table(x,header=TRUE, sep = >>>>>>"\t",fill=TRUE)) >>>>>>listaB<-lapply(directB, function(x) read.table(x,header=TRUE, sep = >>>>>>"\t",fill=TRUE)) >>>>>> >>>>>>#here I am changing the names listaT, z, etc.. >>>>>> >>>>>>count different mm values >>>>>> cab <- vector() >>>>>> for (i in 1:length(lista)) { >>>>>> dc<-lista[[i]][ifelse(lista[[i]]$b<0.01, TRUE, FALSE),] >>>>>> dc<-table(dc$mm) >>>>>> cab <- c(cab, names(dc)) >>>>>> } >>>>>> >>>>>> #Relative freqs to construct the graph >>>>>> cab <- unique(cab) >>>>>> d <- matrix(ncol=length(cab)) >>>>>> dci<- d[-1,] >>>>>> dcf <- d[-1,] >>>>>> dti <- d[-1,] >>>>>> dtf <- d[-1,] >>>>>> >>>>>> ######################################## >>>>>> for (i in 1:length(listaA)) { >>>>>> >>>>>> #Relative freq of all data >>>>>> dcc<-listaA[[i]] >>>>>> dcc<-table(factor(dcc$mm, levels=cab)) >>>>>> dci<- rbind(dci, dcc) >>>>>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "a") >>>>>> >>>>>> >>>>>> #Relative freq of data with FDR<0.01 >>>>>> dcc1<-listaA[[i]][ifelse(listaA[[i]]$FDR<0.01, TRUE, FALSE),] >>>>>> dcc1<-table(factor(dcc1$mm, levels=cab)) >>>>>> dcf<- rbind(dcf,dcc1) >>>>>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "a") >>>>>> } >>>>>> >>>>>> for (i in 1:length(listaB)) { >>>>>> >>>>>> #Relative freq of all data >>>>>> dct<-listaB[[i]] >>>>>> dct<-table(factor(dct$mm, levels=cab)) >>>>>> dti<- rbind(dti, dct) >>>>>> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "b") >>>>>> >>>>>> >>>>>> #Relative freq of data with FDR<0.01 >>>>>> dct1<-listaB[[i]][ifelse(listaB[[i]]$FDR<0.01, TRUE, FALSE),] >>>>>> dct1<-table(factor(dct1$mm, levels=cab)) >>>>>> dtf<- rbind(dtf,dct1) >>>>>> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "b") >>>>>> } >>>>>> freq.i<-rbind(dci,dti) >>>>>> freq.f<-rbind(dcf,dtf) >>>>>> freq.rel.i<-freq.i/apply(freq.i,1,sum) >>>>>> freq.rel.f<-freq.f/apply(freq.f,1,sum) >>>>>> >>>>>> >>>>>> freq.i >>>>>># 2 3 >>>>>>#a1 4 1 >>>>>>#a2 4 1 >>>>>>#a3 4 1 >>>>>>#b1 4 1 >>>>>>#b2 4 1 >>>>>>#b3 4 1 >>>>>>#b4 4 1 >>>>>>#result from my code. >>>>>> files<-paste("MSMS_",23,"PepInfo.txt",sep="") >>>>>>read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); >>>>>>lapply(x,function(y) read.table(y,header=TRUE,sep = >>>>>>"\t",stringsAsFactors=FALSE,fill=TRUE))} >>>>>>lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data)) >>>>>>names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") >>>>>> >>>>>>res2<-split(lista,names(lista)) >>>>>>res3<- lapply(res2,function(x) >>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x}) >>>>>>res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]], >>>>>>function(x) table(x$mm[x[["b"]]<0.01])))) >>>>>> names(res4)<- names(res2) >>>>>> >>>>>> >>>>>>res4 >>>>>>$group_a >>>>>># 2 3 >>>>>>#a1 3 1 >>>>>>#a2 3 1 >>>>>>#a3 3 1 >>>>>> >>>>>>#$group_b >>>>>> # 2 3 >>>>>>#b1 3 1 >>>>>>#b2 3 1 >>>>>> >>>>>>#$group_c >>>>>> # 2 3 >>>>>>#c1 3 1 >>>>>> >>>>>>There is a difference in output from freq.i and res4. There were only >>>>>>two files under 'group_b`. So, check your codes. >>>>>>A.K. >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>>>________________________________ >>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>To: arun <smartpink...@yahoo.com> >>>>>>Sent: Monday, February 18, 2013 10:27 AM >>>>>>Subject: Re: reading data >>>>>> >>>>>> >>>>>>Hi!!! >>>>>> >>>>>>I'm coming to ask a new question. >>>>>> >>>>>>I want a function to do my statistics. I start with you had send me: >>>>>> >>>>>>z.plot <- function(directory,number) { >>>>>> setwd(directory) >>>>>> indx<-gsub("[./]","",list.dirs()) >>>>>> indx1<- indx[indx!=""] >>>>>> print(indx1) >>>>>> files<-paste("MSMS_",number,"PepInfo.txt",sep="") >>>>>> read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); >>>>>>lapply(x,function(y) read.table(y,header=TRUE,sep = >>>>>>"\t",stringsAsFactors=FALSE,fill=TRUE))} >>>>>> >>>>>>lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data)) >>>>>> print(lista) >>>>>> #names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") ve = >>>>>>TRUE) >>>>>> } >>>>>>z.plot("C:/Users/Vera Costa/Desktop/dados.lixo",23) >>>>>> >>>>>> >>>>>>In my lista I can´t merge rows to have the group, because the idea is for >>>>>>each file count frequencies of mm, when b<0.01. after that I want a >>>>>>graph like the graph in attach. >>>>>> >>>>>> >>>>>>When I had 2 groups and knew the name of the groups, I did the code (but >>>>>>Know I have more groups and, maybe, I don´t know the name of the groups): >>>>>> >>>>>>z.plot <- function(directory,number) { >>>>>> #reading data >>>>>> setwd(directory) >>>>>> direct<-dir(directory,pattern = >>>>>>paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive >>>>>>= TRUE) >>>>>> directT <- direct[grepl("^t", direct)] >>>>>> directC <- direct[grepl("^c", direct)] >>>>>> >>>>>> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t")) >>>>>> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = >>>>>>"\t")) >>>>>> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = >>>>>>"\t")) >>>>>> >>>>>> #count different z values >>>>>> cab <- vector() >>>>>> for (i in 1:length(lista)) { >>>>>> dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),] >>>>>> dc<-table(dc$z) >>>>>> cab <- c(cab, names(dc)) >>>>>> } >>>>>> >>>>>> #Relative freqs to construct the graph >>>>>> cab <- unique(cab) >>>>>> d <- matrix(ncol=length(cab)) >>>>>> dci<- d[-1,] >>>>>> dcf <- d[-1,] >>>>>> dti <- d[-1,] >>>>>> dtf <- d[-1,] >>>>>> >>>>>> for (i in 1:length(listaC)) { >>>>>> >>>>>> #Relative freq of all data >>>>>> dcc<-listaC[[i]] >>>>>> dcc<-table(factor(dcc$z, levels=cab)) >>>>>> dci<- rbind(dci, dcc) >>>>>> rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c") >>>>>> >>>>>> >>>>>> #Relative freq of data with FDR<0.01 >>>>>> dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),] >>>>>> dcc1<-table(factor(dcc1$z, levels=cab)) >>>>>> dcf<- rbind(dcf,dcc1) >>>>>> rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c") >>>>>> } >>>>>> >>>>>> for (i in 1:length(listaT)) { >>>>>> >>>>>> #Relative freq of all data >>>>>> dct<-listaT[[i]] >>>>>> dct<-table(factor(dct$z, levels=cab)) >>>>>> dti<- rbind(dti, dct) >>>>>> rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t") >>>>>> >>>>>> >>>>>> #Relative freq of data with FDR<0.01 >>>>>> dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),] >>>>>> dct1<-table(factor(dct1$z, levels=cab)) >>>>>> dtf<- rbind(dtf,dct1) >>>>>> rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t") >>>>>> } >>>>>> freq.i<-rbind(dci,dti) >>>>>> freq.f<-rbind(dcf,dtf) >>>>>> freq.rel.i<-freq.i/apply(freq.i,1,sum) >>>>>> freq.rel.f<-freq.f/apply(freq.f,1,sum) >>>>>> >>>>>>#Graph plot >>>>>>colour<-sample(rainbow(nrow(freq.rel.i))) >>>>>>par(mfrow=c(1,2)) >>>>>>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative >>>>>>Frequencies",col=colour,legend.text = rownames(freq.rel.i)) >>>>>>barplot(freq.rel.f,beside=T,main=("Sample with >>>>>>FDR<0.01"),xlab="Charge",ylab="Relative >>>>>>Frequencies",col=colour,legend.text = rownames(freq.rel.f)) >>>>>>#average of the group (except c1&t1) >>>>>>freqs<-rbind(dcf[-1,], dtf[-1,]) >>>>>>average<-apply(freqs,2,mean) >>>>>> >>>>>>#chisquare test function >>>>>>chisq.test<-function(x,y){ >>>>>> somax<-sum(x) >>>>>> somay<-sum(y) >>>>>> nj.<-x+y >>>>>> nj<-sum(nj.) >>>>>> ejx<-(nj./nj)*somax >>>>>> ejy<-(nj./nj)*somay >>>>>> ETx<-((x-ejx)^2)/ejx >>>>>> ETy<-((y-ejy)^2)/ejy >>>>>> ETobs<-sum(ETx)+sum(ETy) >>>>>> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE) >>>>>> return(pvalue) >>>>>> } >>>>>> >>>>>>#pvalues of the chisquare test between sample and average (H0: two >>>>>>samples has the same distribution) >>>>>>pvalues<-c() >>>>>>for (i in 1:(nrow(freqs))){ >>>>>>a<-chisq.test(freqs[i,],average) >>>>>>pvalues<-c(pvalues,a) >>>>>>} >>>>>>#data frame with final p-values >>>>>>dataframe<-data.frame(c(rownames(freqs)), c(pvalues)) >>>>>>colnames(dataframe)<-c("sample name","pvalue") >>>>>>print(dataframe) >>>>>>} >>>>>>z.plot("C:/Users/Vera/Desktop/data",23) >>>>>> >>>>>> >>>>>> >>>>>>Thank you again >>>>>> >>>>>> >>>>>> >>>>>>2013/2/17 arun <smartpink...@yahoo.com> >>>>>> >>>>>>HI Vera, >>>>>>> >>>>>>>No problem. I am cc:ing to r-help. >>>>>>> >>>>>>>A.K. >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>>________________________________ >>>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>>To: arun <smartpink...@yahoo.com> >>>>>>>Sent: Sunday, February 17, 2013 5:44 AM >>>>>>>Subject: Re: reading data >>>>>>> >>>>>>> >>>>>>> >>>>>>>Hi. Thank you. It works now:-) >>>>>>>And yes, I use windows. >>>>>>>Thank you very much. >>>>>>>No dia 17 de Fev de 2013 00:44, "arun" <smartpink...@yahoo.com> escreveu: >>>>>>> >>>>>>>Hi Vera, >>>>>>>> >>>>>>>>Have you tried the suggestion? >>>>>>>> >>>>>>>>Are you using Windows? >>>>>>>>Thanks, >>>>>>>>Arun >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>>________________________________ >>>>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>>>To: arun <smartpink...@yahoo.com> >>>>>>>>Sent: Saturday, February 16, 2013 7:10 PM >>>>>>>>Subject: Re: reading data >>>>>>>> >>>>>>>> >>>>>>>>Thank you. >>>>>>>>In mine, I have an error " 'what' must be a character string or a >>>>>>>>function". >>>>>>>>I need to do equivalent in my system. >>>>>>>>Thank you and sorry one more time. >>>>>>>>No dia 16 de Fev de 2013 23:53, "arun" <smartpink...@yahoo.com> >>>>>>>>escreveu: >>>>>>>> >>>>>>>>Hi, >>>>>>>>>You didn't mention what the error message or whether you are reading >>>>>>>>>file names which are not "mmmmm11kk.txt". >>>>>>>>> >>>>>>>>>It is workiing on my system as I run it again. >>>>>>>>>?c() combine values into a vector or list. >>>>>>>>> >>>>>>>>> sessionInfo() >>>>>>>>>R version 2.15.1 (2012-06-22) >>>>>>>>>Platform: x86_64-pc-linux-gnu (64-bit) >>>>>>>>> >>>>>>>>>locale: >>>>>>>>> [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C >>>>>>>>> [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8 >>>>>>>>> [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8 >>>>>>>>> [7] LC_PAPER=C LC_NAME=C >>>>>>>>> [9] LC_ADDRESS=C LC_TELEPHONE=C >>>>>>>>>[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C >>>>>>>>> >>>>>>>>>attached base packages: >>>>>>>>>[1] stats graphics grDevices utils datasets methods base >>>>>>>>> >>>>>>>>> >>>>>>>>>other attached packages: >>>>>>>>>[1] stringr_0.6.2 reshape2_1.2.2 >>>>>>>>> >>>>>>>>>loaded via a namespace (and not attached): >>>>>>>>>[1] plyr_1.8 >>>>>>>>> >>>>>>>>> >>>>>>>>>#code >>>>>>>>> >>>>>>>>> >>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) >>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) >>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) #it >>>>>>>>>seems like one of the rows of your file doesn't have 6 elements, so >>>>>>>>>added fill=TRUE >>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="") >>>>>>>>>res2<-split(res,names(res)) >>>>>>>>>res3<- lapply(res2,function(x) >>>>>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x}) >>>>>>>>>#result >>>>>>>>> >>>>>>>>>res3 >>>>>>>>>#$group_a >>>>>>>>>#$group_a$a1 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>>$group_a$a2 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>>$group_a$a3 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>> >>>>>>>>>$group_b >>>>>>>>>$group_b$b1 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>>$group_b$b2 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>> >>>>>>>>>$group_c >>>>>>>>>$group_c$c1 >>>>>>>>> Id M mm x b u k j y p v >>>>>>>>>1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>> >>>>>>>>> >>>>>>>>>A.K. >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>>________________________________ >>>>>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>>>>To: arun <smartpink...@yahoo.com> >>>>>>>>>Sent: Saturday, February 16, 2013 6:32 PM >>>>>>>>>Subject: Re: reading data >>>>>>>>> >>>>>>>>> >>>>>>>>>Sorry again... In: >>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("... >>>>>>>>>What is this c? In do.call(c, When I put this row im R, I have an >>>>>>>>>error. >>>>>>>>>Thank you >>>>>>>>>No dia 15 de Fev de 2013 18:11, "arun" <smartpink...@yahoo.com> >>>>>>>>>escreveu: >>>>>>>>> >>>>>>>>>Hi, >>>>>>>>>>No problem. >>>>>>>>>> >>>>>>>>>>BTW, these questions are not stupid.. >>>>>>>>>>Arun >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>________________________________ >>>>>>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>>>>>To: arun <smartpink...@yahoo.com> >>>>>>>>>>Sent: Friday, February 15, 2013 1:08 PM >>>>>>>>>>Subject: Re: reading data >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>Thank you very much. >>>>>>>>>> >>>>>>>>>>I will try to apply and after I tell you if it is ok :-) >>>>>>>>>> >>>>>>>>>>Thank you and sorry about this questions (sometimes stupid questions). >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>2013/2/15 arun <smartpink...@yahoo.com> >>>>>>>>>> >>>>>>>>>>HI, >>>>>>>>>>>No problem. >>>>>>>>>>>?c() for concatenate to vector or list(). >>>>>>>>>>>If I use do.call(cbind,..) or do.call(rbind,...) >>>>>>>>>>> >>>>>>>>>>>do.call(cbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) >>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) >>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) >>>>>>>>>>># [,1] [,2] [,3] [,4] [,5] [,6] >>>>>>>>>>>#a1 List,11 List,11 List,11 List,11 List,11 List,11 >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>do.call(rbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) >>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) >>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) >>>>>>>>>>># a1 >>>>>>>>>>>#[1,] List,11 >>>>>>>>>>>#[2,] List,11 >>>>>>>>>>>#[3,] List,11 >>>>>>>>>>>#[4,] List,11 >>>>>>>>>>>#[5,] List,11 >>>>>>>>>>>#[6,] List,11 >>>>>>>>>>>ie. >>>>>>>>>>>list within in a list >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>restrial<-lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) >>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) >>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}) >>>>>>>>>>> str(restrial) >>>>>>>>>>>#List of 6 >>>>>>>>>>># $ :List of 1 >>>>>>>>>>> #..$ a1:'data.frame': 6 obs. of 11 variables: >>>>>>>>>>> .#. ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ... >>>>>>>>>>> #.. ..$ M : chr [1:6] "1" "1" "2" "1" ... >>>>>>>>>>> #. ..$ mm: int [1:6] 2 2 1 2 3 2 >>>>>>>>>>> #. ..$ x : int [1:6] 739 2263 1 1965 3660 1972 >>>>>>>>>>> ----------------------------------------------------------------- >>>>>>>>>>>str(res) >>>>>>>>>>>#List of 6 >>>>>>>>>>># $ a1:'data.frame': 6 obs. of 11 variables: >>>>>>>>>>> # ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ... >>>>>>>>>>> #..$ M : chr [1:6] "1" "1" "2" "1" ... >>>>>>>>>>> # ..$ mm: int [1:6] 2 2 1 2 3 2 >>>>>>>>>>> # ..$ x : int [1:6] 739 2263 1 1965 3660 1972 >>>>>>>>>>>----------------------------------------------------------------- >>>>>>>>>>> >>>>>>>>>>>You mentioned about naming this to "group_a","group_b". etc.. >>>>>>>>>>> >>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="") >>>>>>>>>>>res2<-split(res,names(res)) >>>>>>>>>>> >>>>>>>>>>>res3<- lapply(res2,function(x) >>>>>>>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x}) >>>>>>>>>>> res3$group_a >>>>>>>>>>>$a1 >>>>>>>>>>> >>>>>>>>>>># Id M mm x b u k j y p v >>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>> >>>>>>>>>>>#$a2 >>>>>>>>>>> >>>>>>>>>>># Id M mm x b u k j y p v >>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>> >>>>>>>>>>>#$a3 >>>>>>>>>>> >>>>>>>>>>> # Id M mm x b u k j y p v >>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>A.K. >>>>>>>>>>> >>>>>>>>>>>________________________________ >>>>>>>>>>>From: Vera Costa <veracosta...@gmail.com> >>>>>>>>>>>To: arun <smartpink...@yahoo.com> >>>>>>>>>>>Sent: Friday, February 15, 2013 12:39 PM >>>>>>>>>>>Subject: Re: reading data >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>Thank you very much and sorry my questions. >>>>>>>>>>> >>>>>>>>>>>But this code isn't grouping for letters sure? I mean, a1,a2,a3 is >>>>>>>>>>>the same group, (the first letter give me the name of the group) >>>>>>>>>>> >>>>>>>>>>>Another question, in do.call, you did do.call (c,.....) .What is c? >>>>>>>>>>> >>>>>>>>>>>Sorry >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>2013/2/15 arun <smartpink...@yahoo.com> >>>>>>>>>>> >>>>>>>>>>>HI, >>>>>>>>>>>> >>>>>>>>>>>>Just to add: >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) >>>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) >>>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) #it >>>>>>>>>>>>seems like one of the rows of your file doesn't have 6 elements, so >>>>>>>>>>>>added fill=TRUE >>>>>>>>>>>> >>>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="") >>>>>>>>>>>>res[grep("group_b",names(res))] >>>>>>>>>>>> >>>>>>>>>>>>I am not sure how you want the grouped data to look like. If you >>>>>>>>>>>>want something like this: >>>>>>>>>>>>res1<-do.call(rbind,res) >>>>>>>>>>>>res2<-lapply(split(res1,gsub("[.0-9]","",row.names(res1))),function(x) >>>>>>>>>>>> {row.names(x)<-1:nrow(x);x}) >>>>>>>>>>>>res2 >>>>>>>>>>>>#$group_a >>>>>>>>>>>> >>>>>>>>>>>> # Id M mm x b u k j y p v >>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>>#7 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#8 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#9 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#10 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#11 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#12 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>>#13 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#14 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#15 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#16 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#17 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#18 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>>#$group_b >>>>>>>>>>>> # Id M mm x b u k j y p v >>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>>#7 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#8 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#9 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#10 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#11 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#12 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>> >>>>>>>>>>>>#$group_c >>>>>>>>>>>> >>>>>>>>>>>> # Id M mm x b u k j y p v >>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>>#or if you want it like this: >>>>>>>>>>>>res2<-split(res,names(res)) >>>>>>>>>>>> >>>>>>>>>>>>res2[["group_b"]] >>>>>>>>>>>> >>>>>>>>>>>>#$group_b >>>>>>>>>>>># Id M mm x b u k j y p v >>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>> >>>>>>>>>>>>#$group_b >>>>>>>>>>>> # Id M mm x b u k j y p v >>>>>>>>>>>>#1 aAA 1 2 739 0.1257000 2 2 AA 2 8867 8926 >>>>>>>>>>>>#2 aAAAA 1 2 2263 0.0004000 2 2 AR 4 7640 8926 >>>>>>>>>>>>#3 aA 2 1 1 0.0845435 2 AA 2 6790 734,1092 NA >>>>>>>>>>>>#4 aAA 1 2 1965 0.0007000 4 3 AR 2 11616 8926 >>>>>>>>>>>>#5 aAAA 1 3 3660 0.0008600 18 3 AA 2 20392 496 >>>>>>>>>>>>#6 AA na 2 1972 0.0007000 11 3 AR 25 509 734 >>>>>>>>>>>> >>>>>>>>>>>>Hope this helps. >>>>>>>>>>>> >>>>>>>>>>>>A.K. >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>>----- Original Message ----- >>>>>>>>>>>>From: "veracosta...@gmail.com" <veracosta...@gmail.com> >>>>>>>>>>>>To: smartpink...@yahoo.com >>>>>>>>>>>>Cc: >>>>>>>>>>>>Sent: Friday, February 15, 2013 9:15 AM >>>>>>>>>>>>Subject: reading data >>>>>>>>>>>> >>>>>>>>>>>>Hi, >>>>>>>>>>>>I post yesterday and you helped me. I have little problem. >>>>>>>>>>>> >>>>>>>>>>>>At first, I never worked with regular expressions... >>>>>>>>>>>> >>>>>>>>>>>>The code that you gave me it's ok, but my files are inside the >>>>>>>>>>>>folders a1,a2,a3. I try to explain better. >>>>>>>>>>>> >>>>>>>>>>>>I have one folder named "data". Inside this folder I have some >>>>>>>>>>>>other folders named "a1","a2","b1",b2",...and inside of each one of >>>>>>>>>>>>that I have some files. I want only the file "mmmmmm.txt" (in all >>>>>>>>>>>>folders I have One file with this name). >>>>>>>>>>>>The name of the folder give me the name of the group,but I need to >>>>>>>>>>>>read the file inside. And after, have "group_a", >>>>>>>>>>>>group_"b"...because I need to work with this data grouped (and know >>>>>>>>>>>>the name of the group). >>>>>>>>>>>> >>>>>>>>>>>>Thank you. >>>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>> >>>>>>>>> >>>>>>>> >>>>>>> >>>>>> >>>>> >>>> >>> >> > ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.