Hi Tim,

There are two main problems with your implementation of Stochastic gradient 
algorithm:

1.  You are only implementing one cycle of the algorithm, i.e. it cycles over 
each data point only once.  You need to do this several time, until convergence 
of parameters is obtained.
2.  Stochastic gradient algorithm has very slow convergence.  It can be really 
slow if the predictors are not scaled properly.  

I am attaching a code that takes care of (1) and (2).  It gives results that 
are in good agreement with glm() results.  Beware that it is still very slow.  

This seems like your homework assignment.  If so, you should acknowledge that 
you got help from the R group.  

Ravi.
____________________________________________________________________

Ravi Varadhan, Ph.D.
Assistant Professor,
Division of Geriatric Medicine and Gerontology
School of Medicine
Johns Hopkins University

Ph. (410) 502-2619
email: rvarad...@jhmi.edu


----- Original Message -----
From: Tim LIU <timothy.sli...@gmail.com>
Date: Sunday, April 26, 2009 7:41 am
Subject: [R]  Stochastic Gradient Ascent for logistic regression
To: r-help@r-project.org


>  
>  
>  Hi. guys,
>  
>  I am trying to write my own Stochastic Gradient Ascent for logistic 
>  regression in R. But it seems that I am having convergence problem.
>  
>  Am I doing anything wrong, or just the data is off?
>  
>  Here is my code in R -
>  
>  
>  
>  lbw <-
>  read.table(""
>  , header=TRUE)
>  
>  attach(lbw)
>  
>  
>  
>  lbw[1:2,]
>  low age lwt race smoke ptl ht ui ftv bwt
>  1 0 19 182 2 0 0 0 1 0 2523
>  2 0 33 155 3 0 0 0 0 3 2551
>  
>  
>  
>  
>  #-----R implementation of logistic regression : gradient descent ------
>  sigmoid<-function(z)
>  {
>  1/(1 + exp(-1*z))
>  
>  }
>  
>  
>  
>  
>  X<-cbind(age,lwt, smoke, ht, ui)
>  
>  #y<-low
>  
>  
>  my_logistic<-function(X,y)
>  {
>  
>  alpha <- 0.005
>  n<-5 
>  m<-189
>  max_iters <- 189 #number of obs
>  
>  ll<-0
>  
>  X<-cbind(1,X)
>  
>  theta <-rep(0,6) # intercept and 5 regerssors
>  #theta <- c(1.39, -0.034, -0.01, 0.64, 1.89, 0.88) #glm estimates as 
> 
>  starting values
>  theta_all<-theta
>  for (i in 1:max_iters) 
>  { 
>  dim(X)
>  length(theta)
>  hx <- sigmoid(X %*% theta) # matrix 
>  product
>  
>  ix<-i
>  
>  for (j in 1:6)
>  {
>  theta[j] <- theta[j] + alpha * ((y-hx)[ix]) * X[ix,j] 
>  #stochastic gradient !
>  
>  }
>  
>  
>  
>  
>  
>  logl <- sum( y * log(hx) + (1 - y) * log(1 - hx) ) #direct 
>  multiplication
>  
>  ll<-rbind(ll, logl)
>  
>  
>  theta_all = cbind(theta_all,theta)
>  }
>  
>  par(mfrow=c(4,2))
>  
>  
>  plot(na.omit(ll[,1]))
>  lines(ll[,1])
>  
>  for (j in 1:6)
>  {
>  
>  plot(theta_all[j,])
>  lines(theta_all[j,])
>  } 
>  
>  
>  #theta_all
>  #ll
>  cbind(ll,t(theta_all))
>  }
>  
>  
>  my_logistic(X,low)
>  ==============
>  
>  
>  parameter estimates values jumped after 130+ iterations...
>  
>  not converging even when I use parameter estimates as starting values 
> 
>  from glm (family=binomial)
>  
>  
>  help!
>  
>  
>  
>  
>  -- 
>  View this message in context: 
>  Sent from the R help mailing list archive at Nabble.com.
>  
>  ______________________________________________
>  R-help@r-project.org mailing list
>  
>  PLEASE do read the posting guide 
>  and provide commented, minimal, self-contained, reproducible code. 
lbw <- 
read.table("http://www.biostat.jhsph.edu/~ririzarr/Teaching/754/lbw.dat";, 
header=TRUE)
attach(lbw)
lbw[1:2,]
low age lwt race smoke ptl ht ui ftv bwt
1 0 19 182 2 0 0 0 1 0 2523
2 0 33 155 3 0 0 0 0 3 2551


#-----R implementation of logistic regression : gradient descent ------
sigmoid <- function(z) 1/(1 + exp(-1*z))

X <- cbind(age,lwt, smoke, ht, ui)
X.orig <- X
X <- scale(X.orig) # scaling improves convergence

my.logistic<-function(par, X,y, alpha, plot=FALSE)
{

n <- ncol(X)
m <- nrow(X)
ll<- rep(NA, m)
theta_all <- matrix(NA, n, m)

X<-cbind(1,X)
#theta <- c(1.39, -0.034, -0.01, 0.64, 1.89, 0.88) #glm estimates as starting 
values
theta_all<-theta
for (i in 1:m) 
{ 
dim(X)
length(theta)
hx <- sigmoid(X %*% theta) # matrix product
theta <- theta + alpha * (y - hx)[i] * X[i, ]
logl <- sum( y * log(hx) + (1 - y) * log(1 - hx) ) #direct multiplication

ll[i] <- logl

theta_all = cbind(theta_all, theta)
}

if(plot) {
        par(mfrow=c(4,2))
        plot(na.omit(ll))
        lines(ll[1:i])

        for (j in 1:6)
        {
        plot(theta_all[j, 1:i])
        lines(theta_all[j, 1:i])
        } 
}

return(list(par=theta, loglik=logl))
}

theta <-rep(0,6) # intercept and 5 regerssors
delta <- 1
while (delta > 0.0001) {
ans <- my.logistic(theta, X,low, alpha=0.0005,plot=TRUE)
theta.new <- ans$par
delta <- max(abs(theta - theta.new))
theta <- theta.new
}
ans  # you can multiply coefficients by std. dev to get back original coeffs  

==============
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to