At 10:28 19/08/2013, Laz wrote:
Dear R users,

I have written a couple of R functions, some are through the help of the R group members. However, running them takes days instead of minutes or a few hours. I am wondering whether there is a quick way of doing that.

Your example code is rather long for humans to profile. Have you thought of getting R to tell where it is spending most time? The R extensions manual tells you how to do this.


Here are all my R functions. The last one calls almost all of the previous functions. It is the one I am interested in most. It gives me the correct output but it takes several days to run only 1000 or 2000 simulations!
e.g. system.time(test1<-finalF(designs=5,swaps=20));test1
will take about 20 minutes to run but system.time(test1<-finalF(designs=5,swaps=50));test1 takes about 10 hours and system.time(test1<-finalF(designs=25,swaps=2000));test1 takes about 3 days to run

Here are my functions


#####################################################################

ls() # list all existing objects
rm(list = ls()) # remove them all
rm(list = ls()[!grepl("global.var.A", ls())])
# refresh memory
gc()
ls()

### Define a function that requires useful input from the user
#b=4;g=seq(1,20,1);rb=5;cb=4;s2e=1; r=10;c=8

#####################################
####################################
# function to calculate heritability
herit<-function(varG,varR=1)
{
  h<-4*varG/(varG+varR)
  return(c(heritability=h))
}

###################################
# function to calculate random error
varR<-function(varG,h2)
{
  varR<- varG*(4-h2)/h2
  return(c(random_error=varR))
}

##########################################
# function to calculate treatment variance
varG<-function(varR=1,h2)
{
  varG<-varR*h2/(4-h2)
  return(c(treatment_variance=varG))
}


###############################

# calculating R inverse from spatial data
rspat<-function(rhox=0.6,rhoy=0.6)
{
  s2e<-1
  R<-s2e*eye(N)
  for(i in 1:N) {
    for (j in i:N){
      y1<-y[i]
      y2<-y[j]
      x1<-x[i]
      x2<-x[j]
      R[i,j]<-s2e*(rhox^abs(x2-x1))*(rhoy^abs(y2-y1)) # Core AR(1)*AR(1)
      R[j,i]<-R[i,j]
    }
  }
  IR<-solve(R)
  IR
}

ped<<-read.table("ped2new.txt",header=FALSE)
# Now work on the pedigree
## A function to return Zinverse from pedigree

ZGped<-function(ped)
{
  ped2<-data.frame(ped)
lenp2<-length(unique(ped2$V1));lenp2 # how many Genotypes in total in the pedigree =40
  ln2<-length(g);ln2#ln2=nrow(matdf)=30
  # calculate the new Z
  Zped<-model.matrix(~ matdf$genotypes -1)# has order N*t = 180 by 30
  dif<-(lenp2-ln2);dif # 40-30=10
  #print(c(lenp2,ln2,dif))
  zeromatrix<-zeros(nrow(matdf),dif);zeromatrix # 180 by 10
Z<-cbind(zeromatrix,Zped) # Design Matrix for random effect (Genotypes): 180 by 40
  # calculate the new G
  M<-matrix(0,lenp2,lenp2) # 40 by 40
  for (i in 1:nrow(ped2)) { M[ped2[i, 1], ped2[i, 2]] <- ped2[i, 3]  }
  G<-s2g*M # Genetic Variance covariance matrix for pedigree 2: 40 by 40
  IG<-solve(G)
  return(list(IG=IG, Z=Z))
}

##########################
##    Required packages    #
############################
library(gmp)
library(knitr) # load this packages for publishing results
library(matlab)
library(Matrix)
library(psych)
library(foreach)
library(epicalc)
library(ggplot2)
library(xtable)
library(gdata)
library(gplots)

#b=6;g=seq(1,30,1);rb=5;cb=6;r=15;c=12;h2=0.3;rhox=0.6;rhoy=0.6;ped=0

setup<-function(b,g,rb,cb,r,c,h2,rhox=0.6,rhoy=0.6,ped="F")
  {
    # where
    # b   = number of blocks
    # t   = number of treatments per block
    # rb  = number of rows per block
    # cb  = number of columns per block
    # s2g = variance within genotypes
    # h2  = heritability
    # r   = total number of rows for the layout
    # c   = total number of columns for the layout

    ### Check points
    if(b==" ")
        stop(paste(sQuote("block")," cannot be missing"))
    if(!is.vector(g) | length(g)<3)
stop(paste(sQuote("treatments")," should be a vector and more than 2"))
    if(!is.numeric(b))
        stop(paste(sQuote("block"),"is not of class", sQuote("numeric")))
    if(length(b)>1)
        stop(paste(sQuote("block"),"has to be only 1 numeric value"))
    if(!is.whole(b))
        stop(paste(sQuote("block"),"has to be an", sQuote("integer")))

    ## Compatibility checks
    if(rb*cb !=length(g))
stop(paste(sQuote("rb x cb")," should be equal to number of treatment", sQuote("g")))
    if(length(g) != rb*cb)
stop(paste(sQuote("the number of treatments"), "is not equal to", sQuote("rb*cb")))

    ## Generate the design
    g<<-g
    genotypes<-times(b) %do% sample(g,length(g))
    #genotypes<-rep(g,b)
    block<-rep(1:b,each=length(g))
    genotypes<-factor(genotypes)
    block<-factor(block)

    ### generate the base design
    k<-c/cb # number of blocks on the x-axis
    x<<-rep(rep(1:r,each=cb),k)  # X-coordinate

    #w<-rb
    l<-cb
    p<-r/rb
    m<-l+1
    d<-l*b/p
    y<<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate

    ## compact
    matdf<<-data.frame(x,y,block,genotypes)
    N<<-nrow(matdf)
    mm<-summ(matdf)
    ss<-des(matdf)

    ## Identity matrices
    X<<-model.matrix(~block-1)
    h2<<-h2;rhox<<-rhox;rhoy<<-rhoy
    s2g<<-varG(varR=1,h2)
    ## calculate G and Z
ifelse(ped == "F", c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~matdf$genotypes-1)), c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
    ## calculate R and IR
    s2e<-1
ifelse(rhox==0 | rhoy==0, IR<<-(1/s2e)*eye(N), IR<<-rspat(rhox=rhox,rhoy=rhoy))
    C11<-t(X)%*%IR%*%X
    C11inv<-solve(C11)
    K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
      return(list(matdf=matdf,summary=mm,description=ss))

  }

#setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
#system.time(out3<-setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out3
#system.time(out4<-setup(b=16,g=seq(1,196,1),rb=14,cb=14,r=56,c=56,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out4


####################################################
# The function below uses shortcuts from  textbook by Harville 1997
# uses inverse of a partitioned matrix technique
####################################################

mainF<-function(criteria=c("A","D"))
{
  ### Variance covariance matrices
  temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
  C22<-solve(temp)
  ##########################
  ##   Optimality Criteria
  #########################
  traceI<<-sum(diag(C22)) ## A-Optimality
doptimI<<-log(det(C22)) # D-Optimality: minimize the det of the inverse of Inform Matrix
  #return(c(traceI,doptimI))
      if(criteria=="A") return(traceI)
      if(criteria=="D") return(doptimI)
  else{return(c(traceI,doptimI))}
}

# system.time(res1<-mainF(criteria="A"));res1
# system.time(res2<-mainF(criteria="D"));res2
#system.time(res3<-mainF(criteria="both"));res3


##############################################
### Swap function that takes matdf and returns
## global values newnatdf and design matrices
###    Z and IG
##############################################

swapsimple<-function(matdf,ped="F")
{
  # dataset D =mat1 generated from the above function
  ## now, new design after swapping is
  matdf<-as.data.frame(matdf)
  attach(matdf,warn.conflict=FALSE)
  b1<-sample(matdf$block,1,replace=TRUE);b1
  gg1<-matdf$genotypes[block==b1];gg1
  g1<-sample(gg1,2);g1
  samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
               dimnames=list(NULL,c("gen1","gen2","block")));samp
  newGen<-matdf$genotypes
newG<-ifelse(matdf$genotypes==samp[,1] & block==samp[,3],samp[,2],matdf$genotypes)
  NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
  NewG<-factor(NewG)

  ## now, new design after swapping is
  newmatdf<-cbind(matdf,NewG)
  newmatdf<<-as.data.frame(newmatdf)
  mm<-summ(newmatdf)
  ss<-des(newmatdf)

  ## Identity matrices
ifelse(ped == "F", c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)), c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
  ## calculate R and IR
  C11<-t(X)%*%IR%*%X
  C11inv<-solve(C11)
  K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
  return(list(newmatdf=newmatdf,summary=mm,description=ss))
}
#swapsimple(matdf,ped="F")[c(2,3)]
#which(newmatdf$genotypes != newmatdf$NewG)
###########################################
# for one design, swap pairs of treatments
# several times and store the traces
# of the successive swaps
##########################################

optmF<-function(iterations=2,verbose=FALSE)
{
  trace<-c()

  for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
    swapsimple(matdf,ped="F")
    trace[k]<-mainF(criteria="A")
    iterations[k]<-k
    mat<-cbind(trace, iterations= seq(iterations))
   }

  if (verbose){
     cat("***starting matrix\n")
     print(mat)
   }
  # iterate till done
  while(nrow(mat) > 1){
    high <- diff(mat[, 'trace']) > 0
    if (!any(high)) break  # done
    # find which one to delete
    delete <- which.max(high) + 1L
    #mat <- mat[-delete, ]
    mat <- mat[-delete,, drop=FALSE]
  }
  mat
}

#system.time(test1<-optmF(iterations=10));test1

################################################
###############################################

swap<-function(matdf,ped="F",criteria=c("A","D"))
{
  # dataset D =mat1 generated from the above function
  ## now, new design after swapping is
  matdf<-as.data.frame(matdf)
  attach(matdf,warn.conflict=FALSE)
  b1<-sample(matdf$block,1,replace=TRUE);b1
  gg1<-matdf$genotypes[block==b1];gg1
  g1<-sample(gg1,2);g1
  samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
               dimnames=list(NULL,c("gen1","gen2","block")));samp
  newGen<-matdf$genotypes
newG<-ifelse(matdf$genotypes==samp[,1] & block==samp[,3],samp[,2],matdf$genotypes)
  NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
  NewG<-factor(NewG)

  ## now, new design after swapping is
  newmatdf<-cbind(matdf,NewG)
  newmatdf<<-as.data.frame(newmatdf)
  mm<-summ(newmatdf)
  ss<-des(newmatdf)

  ## Identity matrices
  #X<<-model.matrix(~block-1)
  #s2g<<-varG(varR=1,h2)
  ## calculate G and Z
ifelse(ped == "F", c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)), c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
  ## calculate R and IR
  C11<-t(X)%*%IR%*%X
  C11inv<-solve(C11)
  K<-IR%*%X%*%C11inv%*%t(X)%*%IR
  temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
  C22<-solve(temp)
  ##########################
  ##   Optimality Criteria
  #########################
  traceI<-sum(diag(C22)) ## A-Optimality
  doptimI<-log(det(C22)) #
  #return(c(traceI,doptimI))
  if(criteria=="A") return(traceI)
  if(criteria=="D") return(doptimI)
  else{return(c(traceI,doptimI))}
}

#swap(matdf,ped="F",criteria="both")

###########################################
### Generate 25 initial designs
###########################################
#rspatf<-function(design){
#  arr = array(1, dim=c(nrow(matdf),ncol(matdf)+1,design))
#  l<-list(length=dim(arr)[3])
#  for (i in 1:dim(arr)[3]){
#    l[[i]]<-swapsimple(matdf,ped="F")[[1]][,,i]
#  }
#  l
#}
#matd<-rspatf(design=5)
#matd

#which(matd[[1]]$genotypes != matd[[1]]$NewG)
#which(matd[[2]]$genotypes != matd[[2]]$NewG)


###############################################
###############################################

optm<-function(iterations=2,verbose=FALSE)
{
  trace<-c()

  for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
    trace[k]<-swap(matdf,ped="F",criteria="A")
    iterations[k]<-k
    mat<-cbind(trace, iterations= seq(iterations))
  }

  if (verbose){
    cat("***starting matrix\n")
    print(mat)
  }
  # iterate till done
  while(nrow(mat) > 1){
    high <- diff(mat[, 'trace']) > 0
    if (!any(high)) break  # done
    # find which one to delete
    delete <- which.max(high) + 1L
    #mat <- mat[-delete, ]
    mat <- mat[-delete,, drop=FALSE]
  }
  mat
}

#system.time(res<-optm(iterations=10));res
#################################################
################################################
finalF<-function(designs,swaps)
{
  Nmatdf<-list()
  OP<-list()
  Miny<-NULL
  Maxy<-NULL
  Minx<-NULL
  Maxx<-NULL
  for (i in 1:designs)
  {
setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
    mainF(criteria="A")
    for (j in 1:swaps)
    {
      OP[[i]]<- optmF(iterations=swaps)
      Nmatdf[[i]]<-newmatdf[,5]
      Miny[i]<-min(OP[[i]][,1])
      Maxy[i]<-max(OP[[i]][,1])
      Minx[i]<-min(OP[[i]][,2])
      Maxx[i]<-max(OP[[i]][,2])
    }
  }
return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) # gives us both the Optimal conditions and designs
}

#################################################
sink(file= paste(format(Sys.time(), "Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
system.time(test1<-finalF(designs=25,swaps=2000));test1
sink()


I expect results like this below

sink()
finalF<-function(designs,swaps)
+{
+   Nmatdf<-list()
+   OP<-list()
+   Miny<-NULL
+   Maxy<-NULL
+   Minx<-NULL
+   Maxx<-NULL
+   for (i in 1:designs)
+   {
+ setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
+     mainF(criteria="A")
+     for (j in 1:swaps)
+     {
+       OP[[i]]<- optmF(iterations=swaps)
+       Nmatdf[[i]]<-newmatdf[,5]
+       Miny[i]<-min(OP[[i]][,1])
+       Maxy[i]<-max(OP[[i]][,1])
+       Minx[i]<-min(OP[[i]][,2])
+       Maxx[i]<-max(OP[[i]][,2])
+     }
+   }
+ return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) # gives us both the Optimal conditions and designs
+}
sink(file= paste(format(Sys.time(), "Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
system.time(test1<-finalF(designs=5,swaps=5));test1
   user  system elapsed
  37.88    0.00   38.04
$OP
$OP[[1]]
         trace iterations
[1,] 0.8961335          1
[2,] 0.8952822          3
[3,] 0.8934649          4

$OP[[2]]
        trace iterations
[1,] 0.893955          1

$OP[[3]]
         trace iterations
[1,] 0.9007225          1
[2,] 0.8971837          4
[3,] 0.8902474          5

$OP[[4]]
         trace iterations
[1,] 0.8964726          1
[2,] 0.8951722          4

$OP[[5]]
         trace iterations
[1,] 0.8973285          1
[2,] 0.8922594          4


$Miny
[1] 0.8934649 0.8939550 0.8902474 0.8951722 0.8922594

$Maxy
[1] 0.8961335 0.8939550 0.9007225 0.8964726 0.8973285

$Minx
[1] 1 1 1 1 1

$Maxx
[1] 4 1 5 4 4

$Nmatdf
$Nmatdf[[1]]
[1] 30 8 5 28 27 29 1 26 24 22 13 6 17 18 2 19 14 11 3 23 10 15 21 9 25 4 7 20 12 16 14 17 15 5 8 6 19 [38] 4 1 10 11 3 24 20 13 2 27 12 16 28 21 23 30 25 29 7 26 18 9 22 24 21 26 2 13 30 5 28 20 11 3 7 18 25 [75] 22 16 4 17 19 27 29 10 23 6 12 15 14 1 9 8 12 11 3 8 5 20 23 22 7 15 19 29 24 27 13 2 6 1 21 26 25 [112] 10 16 14 18 4 30 17 9 28 29 9 7 27 11 2 30 18 8 14 19 20 15 21 4 3 16 24 13 28 26 10 12 6 5 25 1 17 [149] 23 22 21 2 23 16 4 10 9 22 30 24 1 27 3 20 12 5 26 17 28 11 7 14 8 25 19 13 18 29 15 6 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[2]]
[1] 5 13 30 2 21 23 6 27 16 19 8 26 18 4 20 9 22 28 7 3 15 10 11 17 25 24 29 1 14 12 28 18 23 19 21 16 17 [38] 29 13 7 15 27 25 22 10 1 2 5 30 9 20 3 14 24 26 4 6 12 11 8 8 18 25 12 5 23 21 4 9 17 20 1 2 6 [75] 22 7 16 26 30 29 3 15 19 14 13 11 24 28 27 10 16 21 26 23 25 4 9 24 15 14 22 1 20 27 2 7 17 18 13 8 12 [112] 5 6 19 28 3 10 30 11 29 11 30 14 9 26 5 1 10 29 28 4 18 8 24 20 13 3 23 27 6 15 16 21 2 17 7 25 12 [149] 19 22 7 28 8 11 26 24 12 29 9 16 21 27 22 23 18 19 13 6 15 3 1 30 2 17 14 5 25 20 4 10 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[3]]
[1] 7 25 4 30 12 11 14 13 26 1 10 21 15 22 29 19 27 16 2 24 28 20 3 5 23 8 18 6 17 9 6 21 9 15 11 17 13 [38] 29 24 4 20 7 23 14 2 16 18 26 19 25 8 1 12 10 28 27 22 30 5 3 20 12 8 2 11 18 24 19 9 22 15 7 30 27 [75] 17 29 6 3 5 1 21 25 28 14 23 4 16 26 13 10 20 29 26 25 15 22 9 10 28 17 18 21 6 16 7 1 3 24 11 2 4 [112] 14 8 5 13 27 23 30 19 12 6 30 1 2 7 28 18 8 20 10 4 25 14 19 27 11 13 29 12 9 3 26 22 21 16 15 17 24 [149] 5 23 17 6 25 11 21 29 5 26 13 7 15 2 9 4 18 30 3 8 20 24 27 22 19 16 28 12 1 23 14 10 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[4]]
[1] 24 8 17 30 10 20 4 28 25 16 14 13 7 12 26 29 21 19 1 22 11 6 23 18 15 5 27 2 3 9 1 24 27 15 26 14 28 [38] 20 8 5 4 29 2 25 9 13 6 21 7 22 30 17 3 10 12 19 11 18 16 23 25 18 3 29 1 4 8 6 9 30 2 14 11 16 [75] 23 13 10 12 7 19 17 5 21 28 24 20 15 27 26 22 14 5 7 6 17 3 1 29 25 23 19 11 21 18 4 30 20 8 2 12 9 [112] 16 10 15 27 26 13 24 28 22 19 7 17 1 12 8 18 16 14 22 3 28 27 25 10 6 4 15 30 9 11 5 20 26 24 29 21 2 [149] 23 13 2 16 10 25 18 15 26 22 12 19 30 17 23 8 3 7 20 14 13 28 9 21 11 29 6 5 4 24 27 1 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[5]]
[1] 12 18 8 22 9 21 2 1 29 13 30 25 17 6 16 5 26 7 3 14 23 15 28 27 10 24 20 11 19 4 20 30 14 27 25 4 6 [38] 28 23 8 9 29 26 19 24 7 5 1 11 22 21 2 10 18 12 15 3 17 13 16 16 22 6 9 21 5 14 2 30 10 3 25 27 15 [75] 28 7 17 20 11 8 19 29 12 26 24 13 1 4 18 23 4 16 10 25 5 13 18 19 22 7 28 30 23 21 11 2 14 9 20 24 8 [112] 17 1 15 29 6 12 27 3 26 14 8 26 6 20 9 15 23 3 22 7 30 25 24 1 10 19 21 4 11 2 18 17 13 28 29 27 16 [149] 12 5 19 2 4 5 15 21 17 7 25 8 6 16 20 29 10 18 1 12 26 28 27 11 14 23 22 9 3 13 30 24 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30



Michael Dewey
i...@aghmed.fsnet.co.uk
http://www.aghmed.fsnet.co.uk/home.html

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to