R Users: I am trying to estimate a model of fertility behaviour using birth history data with maximum likelihood. My code works but is extremely slow (because of several for loops and my programming inefficiencies); when I use the genetic algorithm to optimize the likelihood function, it takes several days to complete (on a machine with Intel Core 2 processor [2.66GHz] and 2.99 GB RAM). Computing the hessian and using it to calculate the standard errors takes a large chunk of this time.
I am copying the code for my likelihood function below; it would be great if someone could suggest any method to speed up the code (by avoiding the for loops or by any other method). I am not providing details of my model or what exactly I am trying to do in each step of the computation below; i would be happy to provide these details if they are deemed necessary for re-working the code. Thanks. Deepankar --------- begin code ----------------------- LLK1 <- function(paramets, data.frame, ...) { # DEFINING THE LOGLIKELIHOOD FUNCTION # paramets IS A 1x27 VECTOR OF PARAMETERS OVER WHICH THE FUNCTION WILL BE MAXIMISED # data.frame IS A DATA FRAME. THE DATA FRAME CONTAINS OBSERVATIONS ON SEVERAL VARIABLES # (LIKE EDUCATION, AGE, ETC.) FOR EACH RESPONDENT. COLUMNS REFER TO VARIABLES AND ROWS REFER # TO OBSERVATIONS. ########## PARAMETERS ############################### # alpha: interaction between son targeting and family size # beta : son targeting # gamma : family size # delta : a 1x6 vector of probabilities of male birth at various parities (q1, q2, q3, q4, q5, q6) # zeta : a 1x11 vector of conditional probabilities with zeta[1]=1 always alpha <- paramets[1] # FIRST PARAMETER beta <- paramets[2:9] # SECOND TO SEVENTH PARAMETER gamma <- paramets[10:16] delta <- paramets[17] zeta <- paramets[18:27] # LAST 10 PARAMETERS ################# VARIABLES ############################### # READING IN THE VARIABLES IN THE DATA FRAME # AND RENAMING THEM FOR USE IN THE LIKELIHOOD FUNCTION everborn <- data.frame$v201 alive <- data.frame$alive age <- data.frame$age edu <- data.frame$edu rural <- data.frame$rur rich <- data.frame$rich middle <- data.frame$middle poor <- data.frame$poor work <- data.frame$work jointfam <- data.frame$jfam contracep <- data.frame$contra hindu <- data.frame$hindu muslim <- data.frame$muslim scaste <- data.frame$scaste stribe <- data.frame$stribe obc <- data.frame$obc ucaste <- data.frame$ucaste N <- data.frame$dfsize indN <- data.frame$dfsize1 # INDICATOR FUNCTION THAT dfsize==alive nb <- data.frame$nboy ng <- data.frame$ngirl ncord1 <- data.frame$ncord1 # FIRST CHILD: BOY=0; GIRL=1 ncord2 <- data.frame$ncord2 #SECOND CHILD: BOY=0; GIRL=1 ncord3 <- data.frame$ncord3 ncord4 <- data.frame$ncord4 ncord5 <- data.frame$ncord5 ncord6 <- data.frame$ncord6 # SIXTH CHILD: BOY=0; GIRL=1 ######### POSITION OF i^th BOY ################################################ boy1 <- data.frame$boy1 # BIRTH POSITION OF FIRST BOY (ZERO IF THE FAMILY HAS NO BOYS) boy2 <- data.frame$boy2 # BIRTH POSITION OF SECOND BOY (ZERO IF THE FAMILY HAS ONLY ONE BOY) boy3 <- data.frame$boy3 boy4 <- data.frame$boy4 boy5 <- data.frame$boy5 boy6 <- data.frame$boy6 # BIRTH POSITION OF SIXTH BOY (ZERO IF THE FAMILY HAS ONLY FIVE BOYS) ######################## CONDITIONAL PROBABILITIES ########################## qq21 <- 1 qq31 <- 1/(1+exp(zeta[1])) qq32 <- exp(zeta[1])/(1+exp(zeta[1])) qq41 <- 1/(1+exp(zeta[2])+exp(zeta[3])) qq42 <- exp(zeta[2])/(1+exp(zeta[2])+exp(zeta[3])) qq43 <- exp(zeta[3])/(1+exp(zeta[2])+exp(zeta[3])) qq51 <- 1/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6])) qq52 <- exp(zeta[4])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6])) qq53 <- exp(zeta[5])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6])) qq54 <- exp(zeta[6])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6])) qq61 <- 1/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10])) qq62 <- exp(zeta[7])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10])) qq63 <- exp(zeta[8])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10])) qq64 <- exp(zeta[9])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10])) qq65 <- exp(zeta[10])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10])) zeta1 <- c(qq21,qq31,qq32,qq41,qq42,qq43,qq51,qq52,qq53,qq54,qq61,qq62,qq63,qq64,qq65) ######################################################################### n <- length(N) # LENGTH OF SAMPLE; SIZE OF THE MAIN LOOP lglik <- numeric(n) # INITIALIZING THE LOGLIKELIHOOD FUNCTION # CREATES A 1xn VECTOR OF ZEROS for (j in 1:n) { # START OF MAIN LOOP S <- matrix(0, 6, 6) # CREATE A 6x6 MATRIX OF ZEROS y <- numeric(15) # CREATE A 1x15 VECTOR OF ZEROS N1 <- N[j] # DESIRED FAMILY SIZE q <- 1/(1+exp(delta)) # PROBABILITY OF MALE BIRTH if (alive[j]==N1) { for (i in 1:(N1-1)) { S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j])) } } else { for (i in 1:(N1-1)) { S[N1,i] <- 0 } } ######### CREATE A 1x6 VECTOR WITH POSITION OF BOYS WITHIN FAMILY x <- c(boy1[j], boy2[j], boy3[j], boy4[j], boy5[j]) if (N1>1) { for (i in 1:(N1-1)) { if (alive[j]>x[i] & x[i]>0) { S[N1,i] <- 0 } if (x[i] == alive[j] ) { S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j])) } } } y <- c(S[2,1],S[3,1],S[3,2],S[4,1],S[4,2],S[4,3],S[5,1],S[5,2],S[5,3],S[5,4],S[6,1],S[6,2],S[6,3],S[6,4],S[6,5]) z1 <- c(age[j],edu[j],work[j],rural[j],poor[j],middle[j],hindu[j]) # DETERMINANTS OF FAMILY SIZE z2 <- c(1,age[j],edu[j],work[j],contracep[j],rural[j],jointfam[j],hindu[j]) # DETERMINANTS OF SON TARGETING t1 <- (indN[j])*((q^(nb[j]))*((1-q)^(ng[j])))*(exp(-exp(sum(z1*gamma)))*((exp(sum(z1*gamma)))^N1)*pnorm(-sum(z2*beta)))/factorial(N1) t2 <- (sum(y*zeta1))*(exp(-exp(sum(z1*gamma) + alpha))*((exp(sum(z1*gamma) + alpha))^N1)*(1-pnorm(-sum(z2*beta)))/factorial(N1)) lglik[j] <- log(t1+t2) } return(-sum(lglik)) # RETURNING THE NEGATIVE OF THE LOGLIKELIHOOD # SUMMED OVER ALL OBSERVATIONS } ------------ end code ---------------------- ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.