I am not quite sure what you are trying to do because the phrase "mean fold change values" is not in my experience. I am guessing that it is referring to some sort of geometric mean or multiplicative model. The lengthy code looks very much like a rendition of a Fortran procedure rather than typical R code. I am wondering if you might get better help if you could try to express you needs in the language of general linear models, ofr at least be a bit more expansive in the description of the desired process rather than giving us a series of obscure, un-R- like, uncommented nested loops and asking us to fix them.

--
David Winsemius


On Apr 14, 2009, at 12:50 PM, Weber, Jeffrey D wrote:

I am new to R and have two scripts written slightly different but should to relatively the same thing but my lack of experience with the program I can not figure out the what I need to do to correct it. The first script gives me a consistent mean fold change values with every run but can generate negative p values for some. For the second version of the script, the fold changes seem to be very random but the p values do not go negative. I have highlighted the differences in the script that I noticed. I am think some combination of these two scripts will work but I am not sure.


Version 1

##t test

len_sig_test = length(sig_test)

if(len_sig_test%%2 > 0)

               message("Number of groups for t-test is not paired.")

if(max(sig_test) > group)

               message("Group number for t-test is out of range.")



data.norm.test = data.norm.zo                 #normalized data

nrow <- nrow(data.norm.test)

ncol <- ncol(data.norm.test)



p.ttest = rep(NA, nrow) #parametric test -- t test

p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test

fold.mean <- rep(NA, nrow)

mean.na <- rep(NA, nrow)

mean.nb <- rep(NA, nrow)



sig.peak.all = data.norm.test

g_1 = sig_test[1]

g_2 = sig_test[2]



col_s1 = 1; col_s2 = 1;

for(i in 1:group){

               if(i < g_1)

                               col_s1 = col_s1+group_size[i]

               if(i < g_2)

                               col_s2 = col_s2+group_size[i]

}

col_e1 = col_s1+group_size[g_1]-1

col_e2 = col_s2+group_size[g_2]-1



for(i in 1:nrow){

               na.int.norm <- rep(0, group_size[g_1])

               nb.int.norm <- rep(0, group_size[g_2])

               m = 1

               for(j in col_s1:col_e1){

                               na.int.norm[m] <- data.norm.test[i,j]

                               m <- m+1

               }

                n = 1

               for(j in col_s2:col_e2){

                               nb.int.norm[n] <- data.norm.test[i,j]

                               n <- n+1

               }

                m_na = mean(na.int.norm, na.rm=TRUE)

               m_nb = mean(nb.int.norm, na.rm=TRUE)

               mean.na[i] = exp(m_na)

               mean.nb[i] = exp(m_nb)

if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1){

                               fold.mean[i] = mean.na[i]/mean.nb[i]

p.ttest[i] = t.test(na.int.norm, nb.int.norm)$p.value

p.wilcox[i] = wilcox.test(na.int.norm, nb.int.norm)$p.value

               }

else if(!is.na(m_na) && !is.na(m_nb)) { #peak present in single sample

                               fold.mean[i] = mean.na[i]/mean.nb[i]

                               p.ttest[i] = -1

                               p.wilcox[i] =  -1

               }

               else{

if(is.na(m_na)) { #all data are NA in a group

fold.mean[i] = 0.1 + rnorm(1, mean=0, sd=0.04)

p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)

                               }else if(is.na(m_nb)){

fold.mean[i] = 10 + rnorm(1, mean=0, sd= 4)

p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)

}

}

}

sig.peak.t <- rep(0, nrow)

sig.peak.w <- rep(0, nrow)



n.ttest = n.mann = 0

for(i in 1:nrow){

               if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){

                               n.ttest <- n.ttest+1

                               sig.peak.t[i] = 1

               }

               if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){

                               n.mann <- n.mann+1

                               sig.peak.w[i] = 1

}

}

n.ttest; nrow; n.ttest/nrow*100

n.mann; nrow; n.mann/nrow*100



par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2))          #t-test

plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red")

points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")

Version 1

##t test

len_sig_test = length(sig_test)

if(len_sig_test%%2 > 0)

               message("Number of groups for t-test is not paired.")

if(max(sig_test) > group)

               message("Group number for t-test is out of range.")



data.norm.test = data.norm.zo                 #normalized data

nrow <- nrow(data.norm.test)

ncol <- ncol(data.norm.test)



p.ttest = rep(NA, nrow) #parametric test -- t test

p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test

fold.mean <- rep(NA, nrow)

mean.na <- rep(NA, nrow)

mean.nb <- rep(NA, nrow)



sig.peak.all = data.norm.test

g_1 = sig_test[1]

g_2 = sig_test[2]



col_s1 = 1; col_s2 = 1;

for(i in 1:group){

               if(i < g_1)

                               col_s1 = col_s1+group_size[i]

               if(i < g_2)

                               col_s2 = col_s2+group_size[i]

}

col_e1 = col_s1+group_size[g_1]-1

col_e2 = col_s2+group_size[g_2]-1

for(i in 1:nrow){

               na.int.norm <- rep(0, group_size[g_1])

               nb.int.norm <- rep(0, group_size[g_2])

                m = 1

               for(j in col_s1:col_e1){

                               na.int.norm[m] <- data.norm.test[i,j]

                               m <- m+1

               }

                n = 1

               for(j in col_s2:col_e2){

                               nb.int.norm[n] <- data.norm.test[i,j]

                               n <- n+1

               }



               m_na = mean(na.int.norm, na.rm=TRUE)

               m_nb = mean(nb.int.norm, na.rm=TRUE)

               mean.na[i] = exp(m_na)

               mean.nb[i] = exp(m_nb)

if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1){

                               fold.mean[i] = mean.na[i]/mean.nb[i]

p.ttest[i] = t.test(na.int.norm, nb.int.norm)$p.value

p.wilcox[i] = wilcox.test(na.int.norm, nb.int.norm)$p.value

               }

else if(!is.na(m_na) && !is.na(m_nb)) { #peak present in single sample

                               fold.mean[i] = mean.na[i]/mean.nb[i]

                               p.ttest[i] = -1

                               p.wilcox[i] =  -1

               }

               else{

if(is.na(m_na)) { #all data are NA in a group

fold.mean[i] = 0.1 + rnorm(1, mean=0, sd=0.04)

p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)

                               }else if(is.na(m_nb)){

fold.mean[i] = 10 + rnorm(1, mean=0, sd= 4)

p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)

}

}

}

sig.peak.t <- rep(0, nrow)

sig.peak.w <- rep(0, nrow)



n.ttest = n.mann = 0

for(i in 1:nrow){

               if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){

                               n.ttest <- n.ttest+1

                               sig.peak.t[i] = 1

               }

               if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){

                               n.mann <- n.mann+1

                               sig.peak.w[i] = 1

}

}

n.ttest; nrow; n.ttest/nrow*100

n.mann; nrow; n.mann/nrow*100



par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2))          #t-test

plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red")

points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")



Version 2

##t test

len_sig_test = length(sig_test)

if(len_sig_test%%2 > 0)

               message("Number of groups for t-test is not paired.")

if(max(sig_test) > group)

               message("Group number for t-test is out of range.")



data.norm.test = data.norm.zo                 #normalized data

nrow <- nrow(data.norm.test)

ncol <- ncol(data.norm.test)

for(w in 1:nrow){

               for(z in 1:ncol){

if(is.na(data.norm.test[w,z])){

}else if(exp(data.norm.test[w,z])==0) {data.norm.test[w,z]=NA

}

}

}

p.ttest = rep(NA, nrow) #parametric test -- t test

p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test

fold.mean <- rep(NA, nrow)

mean.na <- rep(NA, nrow)

mean.nb <- rep(NA, nrow)



sig.peak.all = data.norm.test

g_1 = sig_test[1]

g_2 = sig_test[2]



col_s1 = 1; col_s2 = 1;

for(i in 1:group){

               if(i < g_1)

                               col_s1 = col_s1+group_size[i]

               if(i < g_2)

                               col_s2 = col_s2+group_size[i]

}

col_e1 = col_s1+group_size[g_1]-1

col_e2 = col_s2+group_size[g_2]-1



for(i in 1:nrow){

               na.int.norm <- rep(0, group_size[g_1])

               nb.int.norm <- rep(0, group_size[g_2])

                m = 1

               x=0

               for(j in col_s1:col_e1){

                               na.int.norm[m] <- data.norm.test[i,j]

                               if(!is.na(na.int.norm[m])) x<-x+1

                               m <- m+1

               }

               n = 1

               y=0

               for(j in col_s2:col_e2){

                               nb.int.norm[n] <- data.norm.test[i,j]

                               if(!is.na(nb.int.norm[n])) y<-y+1

                               n <- n+1

               }



               m_na = mean(exp(na.int.norm), na.rm=TRUE)

               m_nb = mean(exp(nb.int.norm), na.rm=TRUE)

               mean.na[i] = m_na

               mean.nb[i] = m_nb

if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1&& x!=1 && y!=1 && x !=0 && y !=0){

                               fold.mean[i] = mean.na[i]/mean.nb[i]

p.ttest[i] = t.test(exp(na.int.norm), exp(nb.int.norm))$p.value

p.wilcox[i] = wilcox.test(exp(na.int.norm), exp(nb.int.norm))$p.value

               }else{

if(is.na(m_na)) { #all data are NA in a group

fold.mean[i] = max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1)))

p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))

                                               }else if(is.na(m_nb)){

fold.mean[i] = 10 + rnorm(1, mean=0, sd=1)

p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))

}else if( x==1) fold.mean[i] = max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1)))

p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))

                                               }else if(y==1){

fold.mean[i] = 10 + rnorm(1, mean=0, sd=1)

p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))

                                                               }

                               }

}

sig.peak.t <- rep(0, nrow)

sig.peak.w <- rep(0, nrow)



n.ttest = n.mann = 0

for(i in 1:nrow){

               if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){

                               n.ttest <- n.ttest+1

                               sig.peak.t[i] = 1

               }

               if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){

                               n.mann <- n.mann+1

                               sig.peak.w[i] = 1

               }

}

n.ttest; nrow; n.ttest/nrow*100

n.mann; nrow; n.mann/nrow*100

par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2))          #t-test

plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red")

points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

David Winsemius, MD
Heritage Laboratories
West Hartford, CT

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to