Re: [R] reading data

arun Tue, 19 Feb 2013 16:33:31 -0800

Hi,
Try this:


files<-paste("MSMS_",23,"PepInfo.txt",sep="")
read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); 
lapply(x,function(y) read.table(y,header=TRUE,sep = 
"\t",stringsAsFactors=FALSE,fill=TRUE))}
lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="")
res2<-split(lista,names(lista))
res3<- lapply(res2,function(x) 
{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
#Freq whole data
res4<-lapply(seq_along(res3),function(i) 
do.call(rbind,lapply(res3[[i]],function(x) 
as.data.frame(table(factor(x$z,levels=1:3))))))
names(res4)<- names(res2)
library(reshape2)
freq.i1<-do.call(rbind,lapply(res4,function(x) 
dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value")))
freq.i1
#          id 1  2 3
#group_a   a1 1 12 6
#group_c.1 c1 0 10 3
#group_c.2 c2 0 12 3
#group_c.3 c3 0 13 4
#group_t.1 t1 0 10 4
#group_t.2 t2 1 12 6

freq.rel.i1<- as.matrix(freq.i1[,-1]/rowSums(freq.i1[,-1]) )
 freq.rel.i1
 #                  1         2         3
#group_a   0.05263158 0.6315789 0.3157895
#group_c.1 0.00000000 0.7692308 0.2307692
#group_c.2 0.00000000 0.8000000 0.2000000
#group_c.3 0.00000000 0.7647059 0.2352941
#group_t.1 0.00000000 0.7142857 0.2857143
#group_t.2 0.05263158 0.6315789 0.3157895



#Freq with FDR< 0.01
res5<-lapply(seq_along(res3),function(i) 
do.call(rbind,lapply(res3[[i]],function(x) 
as.data.frame(table(factor(x$z[x[["FDR"]]<0.01],levels=1:3))))))
names(res5)<- names(res2)

freq.f1<- do.call(rbind,lapply(res5,function(x) 
dcast(melt(data.frame(id=gsub("\\..*","",row.names(x)),x),id.var=c("id","Var1")),id~Var1,value.var="value")))

 freq.f1
 #         id 1  2 3
#group_a   a1 1 10 5
#group_c.1 c1 0  7 2
#group_c.2 c2 0  8 2
#group_c.3 c3 0  6 4
#group_t.1 t1 0  7 4
#group_t.2 t2 1 10 5


freq.rel.f1<- as.matrix(freq.f1[,-1]/rowSums(freq.f1[,-1]))

colour<-sample(rainbow(nrow(freq.rel.i1)))
par(mfrow=c(1,2))
barplot(freq.rel.i1,beside=T,main=("Sample"),xlab="Charge",ylab="Relative 
Frequencies",col=colour,legend.text = rownames(freq.rel.i1))
barplot(freq.rel.f1,beside=T,main=("Sample with 
FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = 
rownames(freq.rel.f1))
#change the legend position

Also, didn't check the rest of the code from chisquare test.
A.K.
________________________________
From: Vera Costa <veracosta...@gmail.com>
To: arun <smartpink...@yahoo.com> 
Sent: Tuesday, February 19, 2013 4:19 PM
Subject: Re: reading data


Here is the code and some outputs.

z.plot <- function(directory,number) {
 #reading data
  setwd(directory)
 direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), 
full.names = FALSE, recursive = TRUE)
 directT <- direct[grepl("^t", direct)]
 directC <- direct[grepl("^c", direct)]

 lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
 listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
 listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))

 #count different z values
 cab <- vector()
    for (i in 1:length(lista)) {
         dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
        dc<-table(dc$z)
        cab <- c(cab, names(dc))
  }

 #Relative freqs to construct the graph
    cab <- unique(cab)
 print(cab)

###[1] "2" "3" "1"



    d <- matrix(ncol=length(cab))
 dci<- d[-1,]
    dcf <- d[-1,]
 dti <- d[-1,]
 dtf <- d[-1,]

    for (i in 1:length(listaC)) {

  #Relative freq of all data
  dcc<-listaC[[i]]
  dcc<-table(factor(dcc$z, levels=cab))
  dci<- rbind(dci, dcc)
  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")


  #Relative freq of data with FDR<0.01
  dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
  dcc1<-table(factor(dcc1$z, levels=cab))
  dcf<- rbind(dcf,dcc1)
  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
         }


 for (i in 1:length(listaT)) {

  #Relative freq of all data
  dct<-listaT[[i]]
  dct<-table(factor(dct$z, levels=cab))
  dti<- rbind(dti, dct)
  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")


  #Relative freq of data with FDR<0.01
  dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
  dct1<-table(factor(dct1$z, levels=cab))
  dtf<- rbind(dtf,dct1)
  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
        }

  freq.i<-rbind(dci,dti)
  freq.f<-rbind(dcf,dtf)
  freq.rel.i<-freq.i/apply(freq.i,1,sum)
  freq.rel.f<-freq.f/apply(freq.f,1,sum) 

 print(freq.i)
##      2 3 1
#c1 10 3 0
#c2 12 3 0
#c3 13 4 0
#t1 10 4 0
#t2 12 6 1

 print(freq.f)
  ###     2 3 1
#c1  7 2 0
#c2  8 2 0
#c3  6 4 0
#t1  7 4 0
#t2 10 5 1

 print(freq.rel.i)
###               2         3          1
#c1 0.7692308 0.2307692 0.00000000
#c2 0.8000000 0.2000000 0.00000000
#c3 0.7647059 0.2352941 0.00000000
#t1 0.7142857 0.2857143 0.00000000
#t2 0.6315789 0.3157895 0.05263158
 print(freq.rel.f)

###                 2         3      1
#c1 0.7777778 0.2222222 0.0000
#c2 0.8000000 0.2000000 0.0000
#c3 0.6000000 0.4000000 0.0000
#t1 0.6363636 0.3636364 0.0000
#t2 0.6250000 0.3125000 0.0625

#Graph plot
colour<-sample(rainbow(nrow(freq.rel.i)))
par(mfrow=c(1,2))
barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative 
Frequencies",col=colour,legend.text = rownames(freq.rel.i))
barplot(freq.rel.f,beside=T,main=("Sample with 
FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = 
rownames(freq.rel.f))

#average of the group (except c1&t1)
freqs<-rbind(dcf[-1,], dtf[-1,])
average<-apply(freqs,2,mean)
print(average)

###             2         3         1 
#8.0000000 3.6666667 0.3333333 

#chisquare test function
chisq.test<-function(x,y){
 somax<-sum(x)
 somay<-sum(y)
 nj.<-x+y
 nj<-sum(nj.)
 ejx<-(nj./nj)*somax
 ejy<-(nj./nj)*somay
 ETx<-((x-ejx)^2)/ejx
 ETy<-((y-ejy)^2)/ejy
 ETobs<-sum(ETx)+sum(ETy)
 pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
 return(pvalue)
 }

#pvalues of the chisquare test between sample and average (H0: two samples has 
the same distribution)
pvalues<-c()
for (i in 1:(nrow(freqs))){
a<-chisq.test(freqs[i,],average)
pvalues<-c(pvalues,a)
}


#data frame with final p-values 
dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
colnames(dataframe)<-c("sample name","pvalue")
print(dataframe)

###    sample name    pvalue
#1          c2 0.7235907
#2          c3 0.7963287
#3             0.9079200
}
z.plot("C:/Users/Vera Costa/Desktop/dados",23)

###and two barplots..


Here, I remove the group a1.

Thank you



2013/2/19 arun <smartpink...@yahoo.com>

Hi,
>
>Could you send the results for the folder that was sent to me?  It will be 
>easy for me.
>
>Arun
>
>
>
>
>
>
>________________________________
>From: Vera Costa <veracosta...@gmail.com>
>To: arun <smartpink...@yahoo.com>
>Sent: Tuesday, February 19, 2013 3:47 PM
>
>Subject: Re: reading data
>
>
>Oh sorry, I change the folder.
>
>I send for your folder
>
>
>
>2013/2/19 arun <smartpink...@yahoo.com>
>
>Hello,
>>
>>
>>  Regarding the results, is it from the same folder that you sent to me??
>>I am getting different results by running your steps.
>>
>>
>>direct<- list.files(recursive=TRUE)
>>  direct
>>#[1] "a1/MSMS_23PepInfo.txt" "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt"
>>#[4] "c3/MSMS_23PepInfo.txt" "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt"
>>
>> directT<- list.files(recursive=TRUE)[grepl("^t",dir())]
>>
>>directT
>>#[1] "t1/MSMS_23PepInfo.txt" "t2/MSMS_23PepInfo.txt"
>>
>>
>>directC<- list.files(recursive=TRUE)[grepl("^c",dir())]
>>
>>directC
>>#[1] "c1/MSMS_23PepInfo.txt" "c2/MSMS_23PepInfo.txt" "c3/MSMS_23PepInfo.txt"
>>
>>
>>
>>lista<- lapply(direct,function(x) 
>>read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE))
>> 
>>listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = 
>>"\t",fill=TRUE))
>>listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = 
>>"\t",fill=TRUE))
>>
>> #count different z values
>> cab <- vector()
>>    for (i in 1:length(lista)) {
>>         dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>>        dc<-table(dc$z)
>>        cab <- c(cab, names(dc))
>>  }
>> 
>> #Relative freqs to construct the graph
>>    cab <- unique(cab)
>> print(cab)
>>
>>#[1] "1" "2" "3"  #Here results are not correct
>>
>>
>>d <- matrix(ncol=length(cab))
>> dci<- d[-1,]
>>    dcf <- d[-1,]
>> dti <- d[-1,]
>> dtf <- d[-1,]
>>
>>    for (i in 1:length(listaC)) {
>>
>>  #Relative freq of all data
>>  dcc<-listaC[[i]]
>>  dcc<-table(factor(dcc$z, levels=cab))
>>  dci<- rbind(dci, dcc)
>>  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>
>>
>>  #Relative freq of data with FDR<0.01
>>  dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>>  dcc1<-table(factor(dcc1$z, levels=cab))
>>  dcf<- rbind(dcf,dcc1)
>>  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>>        }
>> print(dci) #here too.
>>
>>#   1  2 3
>>#c1 0 10 3
>>#c2 0 12 3
>>#c3 0 13 4
>>
>>
>>It is important to clear this before I make any changes to the script.  You 
>>need to send me the output of the same data folder to understand what is 
>>going on.
>>
>>
>>Arun
>>________________________________
>>From: Vera Costa <veracosta...@gmail.com>
>>To: arun <smartpink...@yahoo.com>
>>Sent: Tuesday, February 19, 2013 9:24 AM
>>
>>Subject: Re: reading data
>>
>>
>>Ok.
>>
>>Here is the code and some outputs.
>>
>>z.plot <- function(directory,number) {
>> #reading data
>>  setwd(directory)
>> direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), 
>>full.names = FALSE, recursive = TRUE)
>> directT <- direct[grepl("^t", direct)]
>> directC <- direct[grepl("^c", direct)]
>>
>> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
>> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
>> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))
>>
>> #count different z values
>> cab <- vector()
>>    for (i in 1:length(lista)) {
>>         dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>>        dc<-table(dc$z)
>>        cab <- c(cab, names(dc))
>>  }
>>
>> #Relative freqs to construct the graph
>>    cab <- unique(cab)
>> print(cab)
>>
>>###[1] "1" "2" "3" "4" "5"
>>
>>
>>
>>    d <- matrix(ncol=length(cab))
>> dci<- d[-1,]
>>    dcf <- d[-1,]
>> dti <- d[-1,]
>> dtf <- d[-1,]
>>
>>    for (i in 1:length(listaC)) {
>>
>>  #Relative freq of all data
>>  dcc<-listaC[[i]]
>>  dcc<-table(factor(dcc$z, levels=cab))
>>  dci<- rbind(dci, dcc)
>>  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>
>>
>>  #Relative freq of data with FDR<0.01
>>  dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>>  dcc1<-table(factor(dcc1$z, levels=cab))
>>  dcf<- rbind(dcf,dcc1)
>>  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>>        }
>> print(dci)
>>
>>###     1     2    3   4  5
>>#c1  93  8356 3621 450 55
>>#c2 108 13513 6859 793 73
>>#c3  97 13526 6724 739 82
>>#c4 101 13417 6574 761 62
>>
>> print(dcf)
>>
>>###    1    2    3   4  5
>>#c1 10 4576 2100 199 17
>>#c2  7 7831 4039 314 23
>>#c3 16 7887 4087 286 22
>>#c4 20 7824 4045 311 20
>>
>> for (i in 1:length(listaT)) {
>>
>>  #Relative freq of all data
>>  dct<-listaT[[i]]
>>  dct<-table(factor(dct$z, levels=cab))
>>  dti<- rbind(dti, dct)
>>  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")
>>
>>
>>  #Relative freq of data with FDR<0.01
>>  dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
>>  dct1<-table(factor(dct1$z, levels=cab))
>>  dtf<- rbind(dtf,dct1)
>>  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
>>        }
>>
>> print(dti)
>>
>>###     1     2    3   4  5
>>#t1  32  8640 4098 429 36
>>#t2 128 13209 6723 788 75
>>#t3  85 13043 6691 754 82
>>#t4 139 13750 7036 807 84
>>
>> print(dtf)
>>
>>
>>####    1    2    3   4  5
>>#t1  5 4885 2571 196  8
>>#t2 12 7752 4209 360 28
>>#t3 19 7563 4086 336 18
>>#t4 14 8108 4218 312 26
>>
>>
>>  freq.i<-rbind(dci,dti)
>>  freq.f<-rbind(dcf,dtf)
>>  freq.rel.i<-freq.i/apply(freq.i,1,sum)
>>  freq.rel.f<-freq.f/apply(freq.f,1,sum) 
>> print(freq.i)
>>##     1     2    3   4  5
>>#c1  93  8356 3621 450 55
>>#c2 108 13513 6859 793 73
>>#c3  97 13526 6724 739 82
>>#c4 101 13417 6574 761 62
>>#t1  32  8640 4098 429 36
>>#t2 128 13209 6723 788 75
>>#t3  85 13043 6691 754 82
>>#t4 139 13750 7036 807 84
>>
>> print(freq.f)
>>  ###  1    2    3   4  5
>>#c1 10 4576 2100 199 17
>>#c2  7 7831 4039 314 23
>>#c3 16 7887 4087 286 22
>>#c4 20 7824 4045 311 20
>>#t1  5 4885 2571 196  8
>>#t2 12 7752 4209 360 28
>>#t3 19 7563 4086 336 18
>>#t4 14 8108 4218 312 26
>>
>> print(freq.rel.i)
>>###             1         2         3          4           5
>>#c1 0.007395626 0.6644930 0.2879523 0.03578529 0.004373757
>>#c2 0.005059496 0.6330460 0.3213248 0.03714982 0.003419844
>>#c3 0.004582389 0.6389834 0.3176493 0.03491119 0.003873772
>>#c4 0.004829070 0.6415013 0.3143199 0.03638537 0.002964380
>>#t1 0.002417832 0.6528145 0.3096335 0.03241405 0.002720060
>>#t2 0.006117670 0.6313148 0.3213210 0.03766190 0.003584572
>>#t3 0.004115226 0.6314694 0.3239409 0.03650448 0.003969983
>>#t4 0.006371470 0.6302714 0.3225156 0.03699120 0.003850385
>> print(freq.rel.f)
>>
>>###              1         2         3          4           5
>>#c1 0.0014488554 0.6629962 0.3042596 0.02883222 0.002463054
>>#c2 0.0005731128 0.6411495 0.3306861 0.02570820 0.001883085
>>#c3 0.0013010246 0.6413238 0.3323305 0.02325581 0.001788909
>>#c4 0.0016366612 0.6402619 0.3310147 0.02545008 0.001636661
>>#t1 0.0006523157 0.6373125 0.3354207 0.02557078 0.001043705
>>#t2 0.0009707952 0.6271337 0.3405064 0.02912386 0.002265189
>>#t3 0.0015804359 0.6290967 0.3398769 0.02794876 0.001497255
>>#t4 0.0011042751 0.6395330 0.3327023 0.02460956 0.002050797
>>
>>#Graph plot
>>colour<-sample(rainbow(nrow(freq.rel.i)))
>>par(mfrow=c(1,2))
>>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative 
>>Frequencies",col=colour,legend.text = rownames(freq.rel.i))
>>barplot(freq.rel.f,beside=T,main=("Sample with 
>>FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = 
>>rownames(freq.rel.f))
>>
>>#average of the group (except c1&t1)
>>freqs<-rbind(dcf[-1,], dtf[-1,])
>>average<-apply(freqs,2,mean)
>>print(average)
>>
>>###         1          2          3          4          5
>> # 14.66667 7827.50000 4114.00000  319.83333   22.83333
>>
>>#chisquare test function
>>chisq.test<-function(x,y){
>> somax<-sum(x)
>> somay<-sum(y)
>> nj.<-x+y
>> nj<-sum(nj.)
>> ejx<-(nj./nj)*somax
>> ejy<-(nj./nj)*somay
>> ETx<-((x-ejx)^2)/ejx
>> ETy<-((y-ejy)^2)/ejy
>> ETobs<-sum(ETx)+sum(ETy)
>> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
>> return(pvalue)
>> }
>>
>>#pvalues of the chisquare test between sample and average (H0: two samples 
>>has the same distribution)
>>pvalues<-c()
>>for (i in 1:(nrow(freqs))){
>>a<-chisq.test(freqs[i,],average)
>>pvalues<-c(pvalues,a)
>>}
>>print(pvalues)
>>##[1] 0.5307206 0.6849480 0.8332661 0.3474956 0.5546527 0.9387602
>>
>>#data frame with final p-values
>>dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
>>colnames(dataframe)<-c("sample name","pvalue")
>>print(dataframe)
>>
>>###  sample name    pvalue
>>#1          c2 0.5307206
>>#2          c3 0.6849480
>>#3          c4 0.8332661
>>#4          t2 0.3474956
>>#5          t3 0.5546527
>>#6          t4 0.9387602
>>}
>>z.plot("C:/Users/Vera Costa/Desktop/dados",23)
>>
>>###and two barplots...
>>
>>Thank you
>>
>>
>>
>>
>>2013/2/19 arun <smartpink...@yahoo.com>
>>
>>Got it.
>>>
>>>So, if I run your codes that you sent yesterday, will I get the correct 
>>>results for relative frequency etc.  It would be also great if you can sent 
>>>me the output generated using your codes (on two groups as you showed 
>>>yesterday).  It will help me in checking results much faster than running 
>>>your code and see if that is the result (because I have to do some 
>>>adjustment to your code for running in linux especially the ?dir()). 
>>>
>>>I may be able to run it only later.
>>>
>>>Arun
>>>
>>>
>>>
>>>
>>>
>>>
>>>________________________________
>>>From: Vera Costa <veracosta...@gmail.com>
>>>To: arun <smartpink...@yahoo.com>
>>>Sent: Tuesday, February 19, 2013 8:53 AM
>>>
>>>Subject: Re: reading data
>>>
>>>
>>>I sent in second email.
>>>
>>>But I send again.
>>>
>>>
>>>
>>>2013/2/19 arun <smartpink...@yahoo.com>
>>>
>>>
>>>>
>>>>Your attachment didn't came through.
>>>>
>>>>Arun
>>>>
>>>>
>>>>
>>>>
>>>>________________________________
>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>To: arun <smartpink...@yahoo.com>
>>>>Sent: Tuesday, February 19, 2013 8:47 AM
>>>>
>>>>Subject: Re: reading data
>>>>
>>>>
>>>>Sorry about a lot of questions.
>>>>
>>>>I attach a small part of my real data (I have a lot of row).
>>>>
>>>>My main objective is construct two graph. The first with the relative 
>>>>frequencies of each group (c1,c2,c3....). The second with the same 
>>>>frequencies but with FDR<0.01.
>>>>
>>>>After that I need to do the average in each group (but without the first 
>>>>group-c1,t1,a1....) and do the qui square test to see if the groups has the 
>>>>same distribution. You understand?
>>>>
>>>>At first, I had only two groups, and I did the code that I sent you. But I 
>>>>need a general code, not for two groups that I know the names, but for all 
>>>>groups (sometimes I can have 7 or 8 or 9 groups).
>>>>
>>>>it´s better now my explanation? :-)
>>>>My English isn't also very good :-)
>>>>
>>>>Please not publish this data in forum...
>>>>
>>>>Thank you
>>>>
>>>>
>>>>
>>>>
>>>>2013/2/18 arun <smartpink...@yahoo.com>
>>>>
>>>>Hi,
>>>>>
>>>>>I run the codes to understand what was going on. 
>>>>>
>>>>>I didn't fully understand it as you constructed the codes for your 
>>>>>original dataset and not for the 'data` directory you sent to me.
>>>>>
>>>>>A.K.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>________________________________
>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>To: arun <smartpink...@yahoo.com>
>>>>>Sent: Monday, February 18, 2013 4:02 PM
>>>>>
>>>>>Subject: Re: reading data
>>>>>
>>>>>
>>>>>Thank you.
>>>>>I don't need the same,but equivalent. I will try your suggestions.
>>>>>Thank you.
>>>>>No dia 18 de Fev de 2013 19:41, "arun" <smartpink...@yahoo.com> escreveu:
>>>>>
>>>>>Hi,
>>>>>>I am not able to open your graph.  I am using linux.
>>>>>>
>>>>>>Also, the codes in the function are not reproducible
>>>>>> directT <- direct[grepl("^t", direct)]
>>>>>> directC <- direct[grepl("^c", direct)]
>>>>>>
>>>>>>It takes double the time to know what is going on.
>>>>>>
>>>>>>dir()
>>>>>>#[1] "a1" "a2" "a3" "b1" "b2" "c1"
>>>>>>
>>>>>>direct<- list.files(recursive=TRUE)[grepl("^a|^b",dir())]
>>>>>>
>>>>>> direct
>>>>>>#[1] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
>>>>>>#[4] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
>>>>>>directA<- list.files(recursive=TRUE)[grepl("^a",dir())]
>>>>>>directB<- list.files(recursive=TRUE)[grepl("^b",dir())]
>>>>>>lista<- lapply(direct,function(x) 
>>>>>>read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE))
>>>>>>
>>>>>>listaA<-lapply(directA, function(x) read.table(x,header=TRUE, sep = 
>>>>>>"\t",fill=TRUE))
>>>>>>listaB<-lapply(directB, function(x) read.table(x,header=TRUE, sep = 
>>>>>>"\t",fill=TRUE))
>>>>>>
>>>>>>#here I am changing the names listaT, z, etc..
>>>>>>
>>>>>>count different mm values
>>>>>> cab <- vector()
>>>>>>    for (i in 1:length(lista)) {
>>>>>>         dc<-lista[[i]][ifelse(lista[[i]]$b<0.01, TRUE, FALSE),]
>>>>>>        dc<-table(dc$mm)
>>>>>>        cab <- c(cab, names(dc))
>>>>>>  }
>>>>>>
>>>>>> #Relative freqs to construct the graph
>>>>>>    cab <- unique(cab)
>>>>>>    d <- matrix(ncol=length(cab))
>>>>>> dci<- d[-1,]
>>>>>>    dcf <- d[-1,]
>>>>>> dti <- d[-1,]
>>>>>> dtf <- d[-1,]
>>>>>>
>>>>>>    ########################################
>>>>>> for (i in 1:length(listaA)) {
>>>>>>
>>>>>>  #Relative freq of all data
>>>>>>  dcc<-listaA[[i]]
>>>>>>  dcc<-table(factor(dcc$mm, levels=cab))
>>>>>>  dci<- rbind(dci, dcc)
>>>>>>  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "a")
>>>>>>
>>>>>>
>>>>>>  #Relative freq of data with FDR<0.01
>>>>>>  dcc1<-listaA[[i]][ifelse(listaA[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>>  dcc1<-table(factor(dcc1$mm, levels=cab))
>>>>>>  dcf<- rbind(dcf,dcc1)
>>>>>>  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "a")
>>>>>>        }
>>>>>>
>>>>>> for (i in 1:length(listaB)) {
>>>>>>
>>>>>>  #Relative freq of all data
>>>>>>  dct<-listaB[[i]]
>>>>>>  dct<-table(factor(dct$mm, levels=cab))
>>>>>>  dti<- rbind(dti, dct)
>>>>>>  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "b")
>>>>>>
>>>>>>
>>>>>>  #Relative freq of data with FDR<0.01
>>>>>>  dct1<-listaB[[i]][ifelse(listaB[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>>  dct1<-table(factor(dct1$mm, levels=cab))
>>>>>>  dtf<- rbind(dtf,dct1)
>>>>>>  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "b")
>>>>>>        }
>>>>>>  freq.i<-rbind(dci,dti)
>>>>>>  freq.f<-rbind(dcf,dtf)
>>>>>>  freq.rel.i<-freq.i/apply(freq.i,1,sum)
>>>>>>  freq.rel.f<-freq.f/apply(freq.f,1,sum)
>>>>>>
>>>>>>
>>>>>> freq.i
>>>>>>#   2 3
>>>>>>#a1 4 1
>>>>>>#a2 4 1
>>>>>>#a3 4 1
>>>>>>#b1 4 1
>>>>>>#b2 4 1
>>>>>>#b3 4 1
>>>>>>#b4 4 1
>>>>>>#result from my code.  
>>>>>> files<-paste("MSMS_",23,"PepInfo.txt",sep="")
>>>>>>read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); 
>>>>>>lapply(x,function(y) read.table(y,header=TRUE,sep = 
>>>>>>"\t",stringsAsFactors=FALSE,fill=TRUE))}
>>>>>>lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
>>>>>>names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="")
>>>>>>
>>>>>>res2<-split(lista,names(lista))
>>>>>>res3<- lapply(res2,function(x) 
>>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]], 
>>>>>>function(x) table(x$mm[x[["b"]]<0.01]))))
>>>>>> names(res4)<- names(res2)
>>>>>>
>>>>>>
>>>>>>res4
>>>>>>$group_a
>>>>>>#   2 3
>>>>>>#a1 3 1
>>>>>>#a2 3 1
>>>>>>#a3 3 1
>>>>>>
>>>>>>#$group_b
>>>>>> #  2 3
>>>>>>#b1 3 1
>>>>>>#b2 3 1
>>>>>>
>>>>>>#$group_c
>>>>>> #  2 3
>>>>>>#c1 3 1
>>>>>>
>>>>>>There is a difference in output from freq.i and res4.  There were only 
>>>>>>two files under 'group_b`.  So, check your codes.
>>>>>>A.K.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>________________________________
>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>Sent: Monday, February 18, 2013 10:27 AM
>>>>>>Subject: Re: reading data
>>>>>>
>>>>>>
>>>>>>Hi!!!
>>>>>>
>>>>>>I'm coming to ask a new question.
>>>>>>
>>>>>>I want a function to do my statistics. I start with you had send me:
>>>>>>
>>>>>>z.plot <- function(directory,number) {
>>>>>>  setwd(directory)
>>>>>> indx<-gsub("[./]","",list.dirs())
>>>>>> indx1<- indx[indx!=""]
>>>>>> print(indx1)
>>>>>> files<-paste("MSMS_",number,"PepInfo.txt",sep="")
>>>>>> read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); 
>>>>>>lapply(x,function(y) read.table(y,header=TRUE,sep = 
>>>>>>"\t",stringsAsFactors=FALSE,fill=TRUE))}
>>>>>> 
>>>>>>lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
>>>>>> print(lista)
>>>>>> #names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") ve = 
>>>>>>TRUE)
>>>>>> }
>>>>>>z.plot("C:/Users/Vera Costa/Desktop/dados.lixo",23)
>>>>>>
>>>>>>
>>>>>>In my lista I can´t merge rows to have the group, because the idea is for 
>>>>>>each file count  frequencies of mm, when b<0.01. after that I want a 
>>>>>>graph like the graph in attach.
>>>>>>
>>>>>>
>>>>>>When I had 2 groups and knew the name of the groups, I did the code (but 
>>>>>>Know I have more groups and, maybe, I don´t know the name of the groups):
>>>>>>
>>>>>>z.plot <- function(directory,number) {
>>>>>> #reading data
>>>>>>  setwd(directory)
>>>>>> direct<-dir(directory,pattern = 
>>>>>>paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive 
>>>>>>= TRUE)
>>>>>> directT <- direct[grepl("^t", direct)]
>>>>>> directC <- direct[grepl("^c", direct)]
>>>>>>
>>>>>> lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
>>>>>> listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = 
>>>>>>"\t"))
>>>>>> listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = 
>>>>>>"\t"))
>>>>>>
>>>>>> #count different z values
>>>>>> cab <- vector()
>>>>>>    for (i in 1:length(lista)) {
>>>>>>         dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>>        dc<-table(dc$z)
>>>>>>        cab <- c(cab, names(dc))
>>>>>>  }
>>>>>>
>>>>>> #Relative freqs to construct the graph
>>>>>>    cab <- unique(cab)
>>>>>>    d <- matrix(ncol=length(cab))
>>>>>> dci<- d[-1,]
>>>>>>    dcf <- d[-1,]
>>>>>> dti <- d[-1,]
>>>>>> dtf <- d[-1,]
>>>>>>
>>>>>>    for (i in 1:length(listaC)) {
>>>>>>
>>>>>>  #Relative freq of all data
>>>>>>  dcc<-listaC[[i]]
>>>>>>  dcc<-table(factor(dcc$z, levels=cab))
>>>>>>  dci<- rbind(dci, dcc)
>>>>>>  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")
>>>>>>
>>>>>>
>>>>>>  #Relative freq of data with FDR<0.01
>>>>>>  dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>>  dcc1<-table(factor(dcc1$z, levels=cab))
>>>>>>  dcf<- rbind(dcf,dcc1)
>>>>>>  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
>>>>>>        }
>>>>>>
>>>>>> for (i in 1:length(listaT)) {
>>>>>>
>>>>>>  #Relative freq of all data
>>>>>>  dct<-listaT[[i]]
>>>>>>  dct<-table(factor(dct$z, levels=cab))
>>>>>>  dti<- rbind(dti, dct)
>>>>>>  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")
>>>>>>
>>>>>>
>>>>>>  #Relative freq of data with FDR<0.01
>>>>>>  dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
>>>>>>  dct1<-table(factor(dct1$z, levels=cab))
>>>>>>  dtf<- rbind(dtf,dct1)
>>>>>>  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
>>>>>>        }
>>>>>>  freq.i<-rbind(dci,dti)
>>>>>>  freq.f<-rbind(dcf,dtf)
>>>>>>  freq.rel.i<-freq.i/apply(freq.i,1,sum)
>>>>>>  freq.rel.f<-freq.f/apply(freq.f,1,sum) 
>>>>>>
>>>>>>#Graph plot
>>>>>>colour<-sample(rainbow(nrow(freq.rel.i)))
>>>>>>par(mfrow=c(1,2))
>>>>>>barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative 
>>>>>>Frequencies",col=colour,legend.text = rownames(freq.rel.i))
>>>>>>barplot(freq.rel.f,beside=T,main=("Sample with 
>>>>>>FDR<0.01"),xlab="Charge",ylab="Relative 
>>>>>>Frequencies",col=colour,legend.text = rownames(freq.rel.f))
>>>>>>#average of the group (except c1&t1)
>>>>>>freqs<-rbind(dcf[-1,], dtf[-1,])
>>>>>>average<-apply(freqs,2,mean)
>>>>>>
>>>>>>#chisquare test function
>>>>>>chisq.test<-function(x,y){
>>>>>> somax<-sum(x)
>>>>>> somay<-sum(y)
>>>>>> nj.<-x+y
>>>>>> nj<-sum(nj.)
>>>>>> ejx<-(nj./nj)*somax
>>>>>> ejy<-(nj./nj)*somay
>>>>>> ETx<-((x-ejx)^2)/ejx
>>>>>> ETy<-((y-ejy)^2)/ejy
>>>>>> ETobs<-sum(ETx)+sum(ETy)
>>>>>> pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
>>>>>> return(pvalue)
>>>>>> }
>>>>>>
>>>>>>#pvalues of the chisquare test between sample and average (H0: two 
>>>>>>samples has the same distribution)
>>>>>>pvalues<-c()
>>>>>>for (i in 1:(nrow(freqs))){
>>>>>>a<-chisq.test(freqs[i,],average)
>>>>>>pvalues<-c(pvalues,a)
>>>>>>}
>>>>>>#data frame with final p-values
>>>>>>dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
>>>>>>colnames(dataframe)<-c("sample name","pvalue")
>>>>>>print(dataframe)
>>>>>>}
>>>>>>z.plot("C:/Users/Vera/Desktop/data",23)
>>>>>>
>>>>>>
>>>>>>
>>>>>>Thank you again
>>>>>>
>>>>>>
>>>>>>
>>>>>>2013/2/17 arun <smartpink...@yahoo.com>
>>>>>>
>>>>>>HI Vera,
>>>>>>>
>>>>>>>No problem.  I am cc:ing to r-help.
>>>>>>>
>>>>>>>A.K.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>________________________________
>>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>>Sent: Sunday, February 17, 2013 5:44 AM
>>>>>>>Subject: Re: reading data
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>Hi. Thank you. It works now:-)
>>>>>>>And yes, I use windows.
>>>>>>>Thank you very much.
>>>>>>>No dia 17 de Fev de 2013 00:44, "arun" <smartpink...@yahoo.com> escreveu:
>>>>>>>
>>>>>>>Hi Vera,
>>>>>>>>
>>>>>>>>Have you tried the suggestion?
>>>>>>>>
>>>>>>>>Are you using Windows?
>>>>>>>>Thanks,
>>>>>>>>Arun
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>________________________________
>>>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>>>Sent: Saturday, February 16, 2013 7:10 PM
>>>>>>>>Subject: Re: reading data
>>>>>>>>
>>>>>>>>
>>>>>>>>Thank you.
>>>>>>>>In mine, I have an error " 'what' must be a character string or a 
>>>>>>>>function".
>>>>>>>>I need to do equivalent in my system.
>>>>>>>>Thank you and sorry one more time.
>>>>>>>>No dia 16 de Fev de 2013 23:53, "arun" <smartpink...@yahoo.com> 
>>>>>>>>escreveu:
>>>>>>>>
>>>>>>>>Hi,
>>>>>>>>>You didn't mention what the error message or whether you are reading 
>>>>>>>>>file names which are  not "mmmmm11kk.txt".
>>>>>>>>>
>>>>>>>>>It is workiing on my system as I run it again.
>>>>>>>>>?c() combine values into a vector or list.
>>>>>>>>>
>>>>>>>>> sessionInfo()
>>>>>>>>>R version 2.15.1 (2012-06-22)
>>>>>>>>>Platform: x86_64-pc-linux-gnu (64-bit)
>>>>>>>>>
>>>>>>>>>locale:
>>>>>>>>> [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C             
>>>>>>>>> [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8   
>>>>>>>>> [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8  
>>>>>>>>> [7] LC_PAPER=C                 LC_NAME=C                
>>>>>>>>> [9] LC_ADDRESS=C               LC_TELEPHONE=C           
>>>>>>>>>[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C      
>>>>>>>>>
>>>>>>>>>attached base packages:
>>>>>>>>>[1] stats     graphics  grDevices utils     datasets  methods   base   
>>>>>>>>> 
>>>>>>>>>
>>>>>>>>>other attached packages:
>>>>>>>>>[1] stringr_0.6.2  reshape2_1.2.2
>>>>>>>>>
>>>>>>>>>loaded via a namespace (and not attached):
>>>>>>>>>[1] plyr_1.8
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>#code
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x)
>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) 
>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))  #it 
>>>>>>>>>seems like one of the rows of your file doesn't have 6 elements, so 
>>>>>>>>>added fill=TRUE
>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>res3<- lapply(res2,function(x) 
>>>>>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>>>>#result
>>>>>>>>>
>>>>>>>>>res3
>>>>>>>>>#$group_a
>>>>>>>>>#$group_a$a1
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>$group_a$a2
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>$group_a$a3
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>$group_b
>>>>>>>>>$group_b$b1
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>$group_b$b2
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>$group_c
>>>>>>>>>$group_c$c1
>>>>>>>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>A.K.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>________________________________
>>>>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>>>>Sent: Saturday, February 16, 2013 6:32 PM
>>>>>>>>>Subject: Re: reading data
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>Sorry again... In:
>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("...
>>>>>>>>>What is this c? In do.call(c,   When I put this row im R, I have an 
>>>>>>>>>error.
>>>>>>>>>Thank you
>>>>>>>>>No dia 15 de Fev de 2013 18:11, "arun" <smartpink...@yahoo.com> 
>>>>>>>>>escreveu:
>>>>>>>>>
>>>>>>>>>Hi,
>>>>>>>>>>No problem.
>>>>>>>>>>
>>>>>>>>>>BTW, these questions are not stupid..
>>>>>>>>>>Arun
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>________________________________
>>>>>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>>>>>Sent: Friday, February 15, 2013 1:08 PM
>>>>>>>>>>Subject: Re: reading data
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>Thank you very much.
>>>>>>>>>>
>>>>>>>>>>I will try to apply and after I tell you if it is ok :-)
>>>>>>>>>>
>>>>>>>>>>Thank you and sorry about this questions (sometimes stupid questions).
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>2013/2/15 arun <smartpink...@yahoo.com>
>>>>>>>>>>
>>>>>>>>>>HI,
>>>>>>>>>>>No problem.
>>>>>>>>>>>?c() for concatenate to vector or list().
>>>>>>>>>>>If I use do.call(cbind,..) or do.call(rbind,...)
>>>>>>>>>>>
>>>>>>>>>>>do.call(cbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x)
>>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) 
>>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) 
>>>>>>>>>>>#   [,1]    [,2]    [,3]    [,4]    [,5]    [,6]  
>>>>>>>>>>>#a1 List,11 List,11 List,11 List,11 List,11 List,11
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> 
>>>>>>>>>>>do.call(rbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x)
>>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) 
>>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) 
>>>>>>>>>>>#     a1    
>>>>>>>>>>>#[1,] List,11
>>>>>>>>>>>#[2,] List,11
>>>>>>>>>>>#[3,] List,11
>>>>>>>>>>>#[4,] List,11
>>>>>>>>>>>#[5,] List,11
>>>>>>>>>>>#[6,] List,11
>>>>>>>>>>>ie.
>>>>>>>>>>>list within in a list
>>>>>>>>>>>
>>>>>>>>>>> 
>>>>>>>>>>>restrial<-lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x)
>>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) 
>>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})
>>>>>>>>>>> str(restrial)
>>>>>>>>>>>#List of 6
>>>>>>>>>>># $ :List of 1
>>>>>>>>>>>  #..$ a1:'data.frame':    6 obs. of  11 variables:
>>>>>>>>>>>  .#. ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>>>>>>>  #.. ..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>>>>>>>>  #. ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>>>>>>>>  #. ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>>>>>>>  -----------------------------------------------------------------
>>>>>>>>>>>str(res)
>>>>>>>>>>>#List of 6
>>>>>>>>>>># $ a1:'data.frame':    6 obs. of  11 variables:
>>>>>>>>>>> # ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>>>>>>>  #..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>>>>>>>> # ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>>>>>>>> # ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>>>>>>>-----------------------------------------------------------------
>>>>>>>>>>>
>>>>>>>>>>>You mentioned about naming this to "group_a","group_b". etc..
>>>>>>>>>>>
>>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>>>
>>>>>>>>>>>res3<- lapply(res2,function(x) 
>>>>>>>>>>>{names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>>>>>>>> res3$group_a
>>>>>>>>>>>$a1
>>>>>>>>>>>
>>>>>>>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>
>>>>>>>>>>>#$a2
>>>>>>>>>>>
>>>>>>>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>
>>>>>>>>>>>#$a3
>>>>>>>>>>>
>>>>>>>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>A.K.
>>>>>>>>>>>
>>>>>>>>>>>________________________________
>>>>>>>>>>>From: Vera Costa <veracosta...@gmail.com>
>>>>>>>>>>>To: arun <smartpink...@yahoo.com>
>>>>>>>>>>>Sent: Friday, February 15, 2013 12:39 PM
>>>>>>>>>>>Subject: Re: reading data
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>Thank you very much and sorry my questions.
>>>>>>>>>>>
>>>>>>>>>>>But this code isn't grouping for letters sure? I mean, a1,a2,a3 is 
>>>>>>>>>>>the same group, (the first letter give me the name of the group)
>>>>>>>>>>>
>>>>>>>>>>>Another question, in do.call, you did do.call (c,.....) .What is c?
>>>>>>>>>>>
>>>>>>>>>>>Sorry
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>2013/2/15 arun <smartpink...@yahoo.com>
>>>>>>>>>>>
>>>>>>>>>>>HI,
>>>>>>>>>>>>
>>>>>>>>>>>>Just to add:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x)
>>>>>>>>>>>> {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) 
>>>>>>>>>>>>read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))  #it 
>>>>>>>>>>>>seems like one of the rows of your file doesn't have 6 elements, so 
>>>>>>>>>>>>added fill=TRUE
>>>>>>>>>>>>
>>>>>>>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>>>>>>>res[grep("group_b",names(res))]
>>>>>>>>>>>>
>>>>>>>>>>>>I am not sure how you want the grouped data to look like.  If you 
>>>>>>>>>>>>want something like this:
>>>>>>>>>>>>res1<-do.call(rbind,res)
>>>>>>>>>>>>res2<-lapply(split(res1,gsub("[.0-9]","",row.names(res1))),function(x)
>>>>>>>>>>>> {row.names(x)<-1:nrow(x);x})
>>>>>>>>>>>>res2
>>>>>>>>>>>>#$group_a
>>>>>>>>>>>>
>>>>>>>>>>>> #     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>>#1    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#2  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#3     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#4    aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#5   aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#6     AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>#7    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#8  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#9     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#10   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#11  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#12    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>#13   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#14 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#15    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#16   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#17  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#18    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>> #     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>>#1    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#2  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#3     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#4    aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#5   aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#6     AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>#7    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#8  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#9     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#10   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#11  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#12    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_c
>>>>>>>>>>>>
>>>>>>>>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>#or if you want it like this:
>>>>>>>>>>>>res2<-split(res,names(res))
>>>>>>>>>>>>
>>>>>>>>>>>>res2[["group_b"]]
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>
>>>>>>>>>>>>#$group_b
>>>>>>>>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>>>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>>>>>>>
>>>>>>>>>>>>Hope this helps.
>>>>>>>>>>>>
>>>>>>>>>>>>A.K.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>----- Original Message -----
>>>>>>>>>>>>From: "veracosta...@gmail.com" <veracosta...@gmail.com>
>>>>>>>>>>>>To: smartpink...@yahoo.com
>>>>>>>>>>>>Cc:
>>>>>>>>>>>>Sent: Friday, February 15, 2013 9:15 AM
>>>>>>>>>>>>Subject: reading data
>>>>>>>>>>>>
>>>>>>>>>>>>Hi,
>>>>>>>>>>>>I post yesterday and you helped me. I have little problem.
>>>>>>>>>>>>
>>>>>>>>>>>>At first, I never worked with regular expressions...
>>>>>>>>>>>>
>>>>>>>>>>>>The code that you gave me it's ok, but my files are inside the 
>>>>>>>>>>>>folders a1,a2,a3. I try to explain better.
>>>>>>>>>>>>
>>>>>>>>>>>>I have one folder named "data". Inside this folder I have some 
>>>>>>>>>>>>other folders named "a1","a2","b1",b2",...and inside of each one of 
>>>>>>>>>>>>that I have some files. I want only the file "mmmmmm.txt" (in all 
>>>>>>>>>>>>folders I have One file with this name).
>>>>>>>>>>>>The name of the folder give me the name of the group,but I need to 
>>>>>>>>>>>>read the file inside. And after, have "group_a", 
>>>>>>>>>>>>group_"b"...because I need to work with this data grouped (and know 
>>>>>>>>>>>>the name of the group).
>>>>>>>>>>>>
>>>>>>>>>>>>Thank you.
>>>>>>>>>>>>   
>>>>>>>>>>>   
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>                                
>>>>>>
>>>>>        
>>>> 
>>>                                            
>> 
>                                    

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] reading data

Reply via email to