I am new to R and have two scripts written slightly different but
should to relatively the same thing but my lack of experience with
the program I can not figure out the what I need to do to correct
it. The first script gives me a consistent mean fold change values
with every run but can generate negative p values for some. For the
second version of the script, the fold changes seem to be very
random but the p values do not go negative. I have highlighted the
differences in the script that I noticed. I am think some
combination of these two scripts will work but I am not sure.
Version 1
##t test
len_sig_test = length(sig_test)
if(len_sig_test%%2 > 0)
message("Number of groups for t-test is not paired.")
if(max(sig_test) > group)
message("Group number for t-test is out of range.")
data.norm.test = data.norm.zo #normalized data
nrow <- nrow(data.norm.test)
ncol <- ncol(data.norm.test)
p.ttest = rep(NA,
nrow) #parametric
test -- t test
p.wilcox = rep(NA, nrow) #non-parametric
test -- Mann-Whitney test
fold.mean <- rep(NA, nrow)
mean.na <- rep(NA, nrow)
mean.nb <- rep(NA, nrow)
sig.peak.all = data.norm.test
g_1 = sig_test[1]
g_2 = sig_test[2]
col_s1 = 1; col_s2 = 1;
for(i in 1:group){
if(i < g_1)
col_s1 = col_s1+group_size[i]
if(i < g_2)
col_s2 = col_s2+group_size[i]
}
col_e1 = col_s1+group_size[g_1]-1
col_e2 = col_s2+group_size[g_2]-1
for(i in 1:nrow){
na.int.norm <- rep(0, group_size[g_1])
nb.int.norm <- rep(0, group_size[g_2])
m = 1
for(j in col_s1:col_e1){
na.int.norm[m] <- data.norm.test[i,j]
m <- m+1
}
n = 1
for(j in col_s2:col_e2){
nb.int.norm[n] <- data.norm.test[i,j]
n <- n+1
}
m_na = mean(na.int.norm, na.rm=TRUE)
m_nb = mean(nb.int.norm, na.rm=TRUE)
mean.na[i] = exp(m_na)
mean.nb[i] = exp(m_nb)
if(group_size[g_1]-sum(is.na(na.int.norm))>1 &&
group_size[g_2]-sum(is.na(nb.int.norm))>1){
fold.mean[i] = mean.na[i]/mean.nb[i]
p.ttest[i] = t.test(na.int.norm,
nb.int.norm)$p.value
p.wilcox[i] =
wilcox.test(na.int.norm, nb.int.norm)$p.value
}
else if(!is.na(m_na) && !is.na(m_nb))
{ #peak present in single sample
fold.mean[i] = mean.na[i]/mean.nb[i]
p.ttest[i] = -1
p.wilcox[i] = -1
}
else{
if(is.na(m_na))
{ #all data are NA in
a group
fold.mean[i] = 0.1 +
rnorm(1, mean=0, sd=0.04)
p.ttest[i] =
p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)
}else if(is.na(m_nb)){
fold.mean[i] = 10 +
rnorm(1, mean=0, sd= 4)
p.ttest[i] =
p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)
}
}
}
sig.peak.t <- rep(0, nrow)
sig.peak.w <- rep(0, nrow)
n.ttest = n.mann = 0
for(i in 1:nrow){
if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){
n.ttest <- n.ttest+1
sig.peak.t[i] = 1
}
if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){
n.mann <- n.mann+1
sig.peak.w[i] = 1
}
}
n.ttest; nrow; n.ttest/nrow*100
n.mann; nrow; n.mann/nrow*100
par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test
plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)",
ylab="p (-log)", col="red")
points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")
Version 1
##t test
len_sig_test = length(sig_test)
if(len_sig_test%%2 > 0)
message("Number of groups for t-test is not paired.")
if(max(sig_test) > group)
message("Group number for t-test is out of range.")
data.norm.test = data.norm.zo #normalized data
nrow <- nrow(data.norm.test)
ncol <- ncol(data.norm.test)
p.ttest = rep(NA,
nrow) #parametric
test -- t test
p.wilcox = rep(NA, nrow) #non-parametric
test -- Mann-Whitney test
fold.mean <- rep(NA, nrow)
mean.na <- rep(NA, nrow)
mean.nb <- rep(NA, nrow)
sig.peak.all = data.norm.test
g_1 = sig_test[1]
g_2 = sig_test[2]
col_s1 = 1; col_s2 = 1;
for(i in 1:group){
if(i < g_1)
col_s1 = col_s1+group_size[i]
if(i < g_2)
col_s2 = col_s2+group_size[i]
}
col_e1 = col_s1+group_size[g_1]-1
col_e2 = col_s2+group_size[g_2]-1
for(i in 1:nrow){
na.int.norm <- rep(0, group_size[g_1])
nb.int.norm <- rep(0, group_size[g_2])
m = 1
for(j in col_s1:col_e1){
na.int.norm[m] <- data.norm.test[i,j]
m <- m+1
}
n = 1
for(j in col_s2:col_e2){
nb.int.norm[n] <- data.norm.test[i,j]
n <- n+1
}
m_na = mean(na.int.norm, na.rm=TRUE)
m_nb = mean(nb.int.norm, na.rm=TRUE)
mean.na[i] = exp(m_na)
mean.nb[i] = exp(m_nb)
if(group_size[g_1]-sum(is.na(na.int.norm))>1 &&
group_size[g_2]-sum(is.na(nb.int.norm))>1){
fold.mean[i] = mean.na[i]/mean.nb[i]
p.ttest[i] = t.test(na.int.norm,
nb.int.norm)$p.value
p.wilcox[i] =
wilcox.test(na.int.norm, nb.int.norm)$p.value
}
else if(!is.na(m_na) && !is.na(m_nb))
{ #peak present in single sample
fold.mean[i] = mean.na[i]/mean.nb[i]
p.ttest[i] = -1
p.wilcox[i] = -1
}
else{
if(is.na(m_na))
{ #all data are NA in
a group
fold.mean[i] = 0.1 +
rnorm(1, mean=0, sd=0.04)
p.ttest[i] =
p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)
}else if(is.na(m_nb)){
fold.mean[i] = 10 +
rnorm(1, mean=0, sd= 4)
p.ttest[i] =
p.wilcox[i] = 0.001 + rnorm(1, mean=0, sd=0.002)
}
}
}
sig.peak.t <- rep(0, nrow)
sig.peak.w <- rep(0, nrow)
n.ttest = n.mann = 0
for(i in 1:nrow){
if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){
n.ttest <- n.ttest+1
sig.peak.t[i] = 1
}
if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){
n.mann <- n.mann+1
sig.peak.w[i] = 1
}
}
n.ttest; nrow; n.ttest/nrow*100
n.mann; nrow; n.mann/nrow*100
par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test
plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)",
ylab="p (-log)", col="red")
points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")
Version 2
##t test
len_sig_test = length(sig_test)
if(len_sig_test%%2 > 0)
message("Number of groups for t-test is not paired.")
if(max(sig_test) > group)
message("Group number for t-test is out of range.")
data.norm.test = data.norm.zo #normalized data
nrow <- nrow(data.norm.test)
ncol <- ncol(data.norm.test)
for(w in 1:nrow){
for(z in 1:ncol){
if(is.na(data.norm.test[w,z])){
}else
if(exp(data.norm.test[w,z])==0) {data.norm.test[w,z]=NA
}
}
}
p.ttest = rep(NA,
nrow) #parametric
test -- t test
p.wilcox = rep(NA, nrow) #non-parametric
test -- Mann-Whitney test
fold.mean <- rep(NA, nrow)
mean.na <- rep(NA, nrow)
mean.nb <- rep(NA, nrow)
sig.peak.all = data.norm.test
g_1 = sig_test[1]
g_2 = sig_test[2]
col_s1 = 1; col_s2 = 1;
for(i in 1:group){
if(i < g_1)
col_s1 = col_s1+group_size[i]
if(i < g_2)
col_s2 = col_s2+group_size[i]
}
col_e1 = col_s1+group_size[g_1]-1
col_e2 = col_s2+group_size[g_2]-1
for(i in 1:nrow){
na.int.norm <- rep(0, group_size[g_1])
nb.int.norm <- rep(0, group_size[g_2])
m = 1
x=0
for(j in col_s1:col_e1){
na.int.norm[m] <- data.norm.test[i,j]
if(!is.na(na.int.norm[m])) x<-x+1
m <- m+1
}
n = 1
y=0
for(j in col_s2:col_e2){
nb.int.norm[n] <- data.norm.test[i,j]
if(!is.na(nb.int.norm[n])) y<-y+1
n <- n+1
}
m_na = mean(exp(na.int.norm), na.rm=TRUE)
m_nb = mean(exp(nb.int.norm), na.rm=TRUE)
mean.na[i] = m_na
mean.nb[i] = m_nb
if(group_size[g_1]-sum(is.na(na.int.norm))>1 &&
group_size[g_2]-sum(is.na(nb.int.norm))>1&& x!=1 && y!=1 && x !=0 &&
y !=0){
fold.mean[i] = mean.na[i]/mean.nb[i]
p.ttest[i] = t.test(exp(na.int.norm),
exp(nb.int.norm))$p.value
p.wilcox[i] =
wilcox.test(exp(na.int.norm), exp(nb.int.norm))$p.value
}else{
if(is.na(m_na))
{ #all data are NA in
a group
fold.mean[i] =
max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1)))
p.ttest[i] =
p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))
}else if(is.na(m_nb)){
fold.mean[i] = 10 +
rnorm(1, mean=0, sd=1)
p.ttest[i] = p.wilcox[i]
= 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))
}else if( x==1)
fold.mean[i] = max(0.1, (0.1 + rnorm(1, mean=0, sd=0.1)))
p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0,
sd=0.0001))
}else if(y==1){
fold.mean[i] = 10 + rnorm(1, mean=0, sd=1)
p.ttest[i] = p.wilcox[i] = 0.0001 + abs(rnorm(1, mean=0, sd=0.0001))
}
}
}
sig.peak.t <- rep(0, nrow)
sig.peak.w <- rep(0, nrow)
n.ttest = n.mann = 0
for(i in 1:nrow){
if(!is.na(p.ttest[i]) & p.ttest[i]<=0.05){
n.ttest <- n.ttest+1
sig.peak.t[i] = 1
}
if(!is.na(p.wilcox[i]) & p.wilcox[i]<=0.05){
n.mann <- n.mann+1
sig.peak.w[i] = 1
}
}
n.ttest; nrow; n.ttest/nrow*100
n.mann; nrow; n.mann/nrow*100
par(mfrow=c(1,1), oma=c(0,0,3,0), mar=c(5,4,4,2)) #t-test
plot(log(fold.mean), -log(p.ttest), xlab="fold change (log)",
ylab="p (-log)", col="red")
points(log(fold.mean), -log(rep(0.05, nrow)), type='l', col="blue")
[[alternative HTML version deleted]]
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.