I am new to R and have two scripts written slightly different but should to relatively the same thing but my lack of experience with the program I can not figure out the what I need to do to correct it. The first script gives me a consistent mean fold change values with every run but can generate negative p values for some. For the second version of the script, the fold changes seem to be very random but the p values do not go negative. I have highlighted the differences in the script that I noticed. I am think some combination of these two scripts will work but I am not sure.
Version 1 ##t test len_sig_test = length(sig_test) if(len_sig_test%%2 > 0) message("Number of groups for t-test is not paired.") if(max(sig_test) > group) message("Group number for t-test is out of range.") data.norm.test = data.norm.zo #normalized data nrow <- nrow(data.norm.test) ncol <- ncol(data.norm.test) p.ttest = rep(NA, nrow) #parametric test -- t test p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test fold.mean <- rep(NA, nrow) mean.na <- rep(NA, nrow) mean.nb <- rep(NA, nrow) sig.peak.all = data.norm.test g_1 = sig_test[1] g_2 = sig_test[2] col_s1 = 1; col_s2 = 1; for(i in 1:group){ if(i < g_1) col_s1 = col_s1+group_size[i] if(i < g_2) col_s2 = col_s2+group_size[i] } col_e1 = col_s1+group_size[g_1]-1 col_e2 = col_s2+group_size[g_2]-1 for(i in 1:nrow){ na.int.norm <- rep(0, group_size[g_1]) nb.int.norm <- rep(0, group_size[g_2]) m = 1 for(j in col_s1:col_e1){ na.int.norm[m] <- data.norm.test[i,j] m <- m+1 } n = 1 for(j in col_s2:col_e2){ nb.int.norm[n] <- data.norm.test[i,j] n <- n+1 } m_na = mean(na.int.norm, na.rm=TRUE) m_nb = mean(nb.int.norm, na.rm=TRUE) mean.na[i] = exp(m_na) mean.nb[i] = exp(m_nb) if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1){ fold.mean[i] = mean.na[i]/mean.nb[i] p.ttest[i] = t.test(na.int.norm, nb.int.norm)$p.value p.wilcox[i] = wilcox.test(na.int.norm, nb.int.norm)$p.value } else if(!is.na(m_na) && !is.na(m_nb)){ #peak present in single sample fold.mean[i] = mean.na[i]/mean.nb[i] p.ttest[i] = -1 p.wilcox[i] = -1 } else{ if(is.na(m_na)){ #all data are NA in a group fold.mean[i] = 0.1 + rnorm(1, mean=0, sd=0.04) p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002) }else if(is.na(m_nb)){ fold.mean[i] = 10 + rnorm(1, mean=0, sd= 4) p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002) } } } sig.peak.t <- rep(0, nrow) sig.peak.w <- rep(0, nrow) n.ttest = n.mann = 0 for(i in 1:nrow){ if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){ n.ttest <- n.ttest+1 sig.peak.t[i] = 1 } if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){ n.mann <- n.mann+1 sig.peak.w[i] = 1 } } n.ttest; nrow; n.ttest/nrow*100 n.mann; nrow; n.mann/nrow*100 par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red") points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue") Version 1 ##t test len_sig_test = length(sig_test) if(len_sig_test%%2 > 0) message("Number of groups for t-test is not paired.") if(max(sig_test) > group) message("Group number for t-test is out of range.") data.norm.test = data.norm.zo #normalized data nrow <- nrow(data.norm.test) ncol <- ncol(data.norm.test) p.ttest = rep(NA, nrow) #parametric test -- t test p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test fold.mean <- rep(NA, nrow) mean.na <- rep(NA, nrow) mean.nb <- rep(NA, nrow) sig.peak.all = data.norm.test g_1 = sig_test[1] g_2 = sig_test[2] col_s1 = 1; col_s2 = 1; for(i in 1:group){ if(i < g_1) col_s1 = col_s1+group_size[i] if(i < g_2) col_s2 = col_s2+group_size[i] } col_e1 = col_s1+group_size[g_1]-1 col_e2 = col_s2+group_size[g_2]-1 for(i in 1:nrow){ na.int.norm <- rep(0, group_size[g_1]) nb.int.norm <- rep(0, group_size[g_2]) m = 1 for(j in col_s1:col_e1){ na.int.norm[m] <- data.norm.test[i,j] m <- m+1 } n = 1 for(j in col_s2:col_e2){ nb.int.norm[n] <- data.norm.test[i,j] n <- n+1 } m_na = mean(na.int.norm, na.rm=TRUE) m_nb = mean(nb.int.norm, na.rm=TRUE) mean.na[i] = exp(m_na) mean.nb[i] = exp(m_nb) if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1){ fold.mean[i] = mean.na[i]/mean.nb[i] p.ttest[i] = t.test(na.int.norm, nb.int.norm)$p.value p.wilcox[i] = wilcox.test(na.int.norm, nb.int.norm)$p.value } else if(!is.na(m_na) && !is.na(m_nb)){ #peak present in single sample fold.mean[i] = mean.na[i]/mean.nb[i] p.ttest[i] = -1 p.wilcox[i] = -1 } else{ if(is.na(m_na)){ #all data are NA in a group fold.mean[i] = 0.1 + rnorm(1, mean=0, sd=0.04) p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002) }else if(is.na(m_nb)){ fold.mean[i] = 10 + rnorm(1, mean=0, sd= 4) p.ttest[i] = p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002) } } } sig.peak.t <- rep(0, nrow) sig.peak.w <- rep(0, nrow) n.ttest = n.mann = 0 for(i in 1:nrow){ if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){ n.ttest <- n.ttest+1 sig.peak.t[i] = 1 } if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){ n.mann <- n.mann+1 sig.peak.w[i] = 1 } } n.ttest; nrow; n.ttest/nrow*100 n.mann; nrow; n.mann/nrow*100 par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red") points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue") Version 2 ##t test len_sig_test = length(sig_test) if(len_sig_test%%2 > 0) message("Number of groups for t-test is not paired.") if(max(sig_test) > group) message("Group number for t-test is out of range.") data.norm.test = data.norm.zo #normalized data nrow <- nrow(data.norm.test) ncol <- ncol(data.norm.test) for(w in 1:nrow){ for(z in 1:ncol){ if(is.na(data.norm.test[w,z])){ }else if(exp(data.norm.test[w,z])==0) {data.norm.test[w,z]=NA } } } p.ttest = rep(NA, nrow) #parametric test -- t test p.wilcox = rep(NA, nrow) #non-parametric test -- Mann-Whitney test fold.mean <- rep(NA, nrow) mean.na <- rep(NA, nrow) mean.nb <- rep(NA, nrow) sig.peak.all = data.norm.test g_1 = sig_test[1] g_2 = sig_test[2] col_s1 = 1; col_s2 = 1; for(i in 1:group){ if(i < g_1) col_s1 = col_s1+group_size[i] if(i < g_2) col_s2 = col_s2+group_size[i] } col_e1 = col_s1+group_size[g_1]-1 col_e2 = col_s2+group_size[g_2]-1 for(i in 1:nrow){ na.int.norm <- rep(0, group_size[g_1]) nb.int.norm <- rep(0, group_size[g_2]) m = 1 x=0 for(j in col_s1:col_e1){ na.int.norm[m] <- data.norm.test[i,j] if(!is.na(na.int.norm[m])) x<-x+1 m <- m+1 } n = 1 y=0 for(j in col_s2:col_e2){ nb.int.norm[n] <- data.norm.test[i,j] if(!is.na(nb.int.norm[n])) y<-y+1 n <- n+1 } m_na = mean(exp(na.int.norm), na.rm=TRUE) m_nb = mean(exp(nb.int.norm), na.rm=TRUE) mean.na[i] = m_na mean.nb[i] = m_nb if(group_size[g_1]-sum(is.na(na.int.norm))>1 && group_size[g_2]-sum(is.na(nb.int.norm))>1&& x!=1 && y!=1 && x !=0 && y !=0){ fold.mean[i] = mean.na[i]/mean.nb[i] p.ttest[i] = t.test(exp(na.int.norm), exp(nb.int.norm))$p.value p.wilcox[i] = wilcox.test(exp(na.int.norm), exp(nb.int.norm))$p.value }else{ if(is.na(m_na)){ #all data are NA in a group fold.mean[i] = max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1))) p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001)) }else if(is.na(m_nb)){ fold.mean[i] = 10 + rnorm(1, mean=0, sd=1) p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001)) }else if( x==1) fold.mean[i] = max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1))) p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001)) }else if(y==1){ fold.mean[i] = 10 + rnorm(1, mean=0, sd=1) p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001)) } } } sig.peak.t <- rep(0, nrow) sig.peak.w <- rep(0, nrow) n.ttest = n.mann = 0 for(i in 1:nrow){ if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){ n.ttest <- n.ttest+1 sig.peak.t[i] = 1 } if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){ n.mann <- n.mann+1 sig.peak.w[i] = 1 } } n.ttest; nrow; n.ttest/nrow*100 n.mann; nrow; n.mann/nrow*100 par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)", ylab="p (-log)", col="red") points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue") [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.