Dear R-devel,
I have re-worked the `constrOptim' function extensively to fix some bugs, and
also to provide some extended functionality. Please find attached the patch
(with comments on the bug fixes and improvements).
I am also attaching a "test" file that shows the difference between the "old"
and "new" versions of `constrOptim'.
Best,
Ravi.
____________________________________________________________________
Ravi Varadhan, Ph.D.
Assistant Professor,
Division of Geriatric Medicine and Gerontology
School of Medicine
Johns Hopkins University
Ph. (410) 502-2619
email: [email protected]
----- Original Message -----
From: Martin Maechler <[email protected]>
Date: Tuesday, June 22, 2010 3:33 am
Subject: Re: [Rd] constrOptim( ): conflict between help page and code
To: Ravi Varadhan <[email protected]>
Cc: [email protected]
> Dear Ravi,
> What we (R-core) really want with such postings is a "diff" aka "patch"
> against the R-devel (!) sources,
> which you can get from
>
>
>
> You produce a patch with e.g.
>
> diff -ubBw <previousversion>constrOptim.R <myversion>constrOptim.R
>
> send that (possibly as attached file e.g. constrOptim.patch)
> to R-devel
>
> Best regards,
> Martin
--- constrOptim.R 2010-06-22 15:44:28.352204298 -0400
+++ constrOptim_NEW.R 2010-06-22 16:00:38.223490505 -0400
@@ -15,22 +15,28 @@
# http://www.r-project.org/Licenses/
-constrOptim <-
- function(theta, f, grad, ui, ci, mu = 0.0001, control = list(),
- method = if(is.null(grad)) "Nelder-Mead" else "BFGS",
- outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE)
-{
+constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control =
list(),
+ method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps =
1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...)
+{
+# Bugs fixed and improvements made by: Ravi Varadhan, Johns Hopkins
University, June 22, 2010.
+#
+# The user can call either "Nelder-Mead" or "BFGS"
+# "BFGS" can be called even when the gradient function is not specified
+# in which case, a finite-difference approximation is computed
+#
if (!is.null(control$fnscale) && control$fnscale < 0)
- mu <- -mu ##maximizing
+ mu <- -mu
R <- function(theta, theta.old, ...) {
ui.theta <- ui%*%theta
gi <- ui.theta-ci
- if (any(gi<0)) return(NaN)
+ if (any(gi < 0))
+ return(NaN)
gi.old <- ui%*%theta.old-ci
bar <- sum(gi.old*log(gi)-ui.theta)
- if (!is.finite(bar)) bar <- -Inf
+ if (!is.finite(bar))
+ bar <- -Inf
f(theta, ...)-mu*bar
}
@@ -42,44 +48,84 @@
grad(theta, ...) - mu*dbar
}
+ method <- match.arg(method, choices=c("BFGS", "Nelder-Mead"))
+
+ if (is.null(grad) & method == "BFGS") {
+ grad <- function(par, ...) {
+ df <- rep(NA, length(par))
+ fval <- f(par, ...)
+ for (i in 1:length(par)) {
+ dx <- par
+ dx[i] <- dx[i] + grad.eps
+ df[i] <- (f(dx, ...) - fval)/grad.eps
+ }
+ df
+ }
+}
+
if (any(ui%*%theta-ci <= 0))
- stop("initial value is not in the interior of the feasible region")
+ stop("initial value not feasible")
obj <- f(theta, ...)
r <- R(theta, theta, ...)
- fun <- function(theta, ...) R(theta, theta.old, ...)
- gradient <- if(method == "SANN") {
- if(missing(grad)) NULL else grad
- } else function(theta, ...) dR(theta, theta.old, ...)
+ feval <- 0
+ geval <- 0
- for(i in seq_len(outer.iterations)) {
+ for (i in 1:outer.iterations) {
+ if (trace.outer) {
+ cat("par: ", theta, "\n")
+ cat("fval: ", obj, "\n")
+ }
obj.old <- obj
r.old <- r
theta.old <- theta
-
+ fun <- function(theta, ...) {
+ R(theta, theta.old, ...)
+ }
+ gradient <- function(theta, ...) {
+ dR(theta, theta.old, ...)
+ }
a <- optim(theta.old, fun, gradient, control = control,
- method = method, hessian = hessian, ...)
+ method = method, ...)
r <- a$value
- if (is.finite(r) && is.finite(r.old) &&
- abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break
+# Following "relative" convergence criterion is flawed
+# if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(outer.eps +
+# abs(r - r.old) ) < outer.eps)
+# break
+# Here is a correct way to do it:
+# if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(1 +
+# abs(r + r.old)/2 ) < outer.eps)
+# break
+# But we use the simpler "absolute" convergence criterion:
+ if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps)
+ break
theta <- a$par
+# We need to accumulate function and gradient evaluation counts
+ feval <- feval + a$counts[1]
+ geval <- geval + a$counts[2]
obj <- f(theta, ...)
- if (obj > obj.old) break
+# The following line is a bug when control$fnscale = -1
+# if (obj > obj.old) break
+# This is the correct version
+ if (obj > obj.old * sign(mu)) break
}
if (i == outer.iterations) {
a$convergence <- 7
- a$message <- gettext("Barrier algorithm ran out of iterations and did
not converge")
+ a$message <- "Barrier algorithm ran out of iterations and did not
converge"
}
if (mu > 0 && obj > obj.old) {
a$convergence <- 11
- a$message <- gettextf("Objective function increased at outer iteration
%d", i)
+ a$message <- paste("Objective function increased at outer iteration",
+ i)
}
if (mu < 0 && obj < obj.old) {
a$convergence <- 11
- a$message <- gettextf("Objective function decreased at outer iteration
%d", i)
+ a$message <- paste("Objective function decreased at outer iteration",
+ i)
}
a$outer.iterations <- i
a$barrier.value <- a$value
a$value <- f(a$par, ...)
a$barrier.value <- a$barrier.value - a$value
+ a$counts <- c(feval, geval)
a
}
--- constrOptim.R 2010-06-22 15:44:28.352204298 -0400
+++ constrOptim_NEW_NC.R 2010-06-22 16:05:03.736204242 -0400
@@ -15,22 +15,23 @@
# http://www.r-project.org/Licenses/
-constrOptim <-
- function(theta, f, grad, ui, ci, mu = 0.0001, control = list(),
- method = if(is.null(grad)) "Nelder-Mead" else "BFGS",
- outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE)
+
+constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control =
list(),
+ method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps =
1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...)
{
if (!is.null(control$fnscale) && control$fnscale < 0)
- mu <- -mu ##maximizing
+ mu <- -mu
R <- function(theta, theta.old, ...) {
ui.theta <- ui%*%theta
gi <- ui.theta-ci
- if (any(gi<0)) return(NaN)
+ if (any(gi < 0))
+ return(NaN)
gi.old <- ui%*%theta.old-ci
bar <- sum(gi.old*log(gi)-ui.theta)
- if (!is.finite(bar)) bar <- -Inf
+ if (!is.finite(bar))
+ bar <- -Inf
f(theta, ...)-mu*bar
}
@@ -42,44 +43,71 @@
grad(theta, ...) - mu*dbar
}
+ method <- match.arg(method, choices=c("BFGS", "Nelder-Mead"))
+
+ if (is.null(grad) & method == "BFGS") {
+ grad <- function(par, ...) {
+ df <- rep(NA, length(par))
+ fval <- f(par, ...)
+ for (i in 1:length(par)) {
+ dx <- par
+ dx[i] <- dx[i] + grad.eps
+ df[i] <- (f(dx, ...) - fval)/grad.eps
+ }
+ df
+ }
+}
+
if (any(ui%*%theta-ci <= 0))
- stop("initial value is not in the interior of the feasible region")
+ stop("initial value not feasible")
obj <- f(theta, ...)
r <- R(theta, theta, ...)
- fun <- function(theta, ...) R(theta, theta.old, ...)
- gradient <- if(method == "SANN") {
- if(missing(grad)) NULL else grad
- } else function(theta, ...) dR(theta, theta.old, ...)
+ feval <- 0
+ geval <- 0
- for(i in seq_len(outer.iterations)) {
+ for (i in 1:outer.iterations) {
+ if (trace.outer) {
+ cat("par: ", theta, "\n")
+ cat("fval: ", obj, "\n")
+ }
obj.old <- obj
r.old <- r
theta.old <- theta
-
+ fun <- function(theta, ...) {
+ R(theta, theta.old, ...)
+ }
+ gradient <- function(theta, ...) {
+ dR(theta, theta.old, ...)
+ }
a <- optim(theta.old, fun, gradient, control = control,
- method = method, hessian = hessian, ...)
+ method = method, ...)
r <- a$value
- if (is.finite(r) && is.finite(r.old) &&
- abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break
+ if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps)
+ break
theta <- a$par
+ feval <- feval + a$counts[1]
+ geval <- geval + a$counts[2]
obj <- f(theta, ...)
- if (obj > obj.old) break
+ if (obj > obj.old * sign(mu)) break
}
if (i == outer.iterations) {
a$convergence <- 7
- a$message <- gettext("Barrier algorithm ran out of iterations and did
not converge")
+ a$message <- "Barrier algorithm ran out of iterations and did not
converge"
}
if (mu > 0 && obj > obj.old) {
a$convergence <- 11
- a$message <- gettextf("Objective function increased at outer iteration
%d", i)
+ a$message <- paste("Objective function increased at outer iteration",
+ i)
}
if (mu < 0 && obj < obj.old) {
a$convergence <- 11
- a$message <- gettextf("Objective function decreased at outer iteration
%d", i)
+ a$message <- paste("Objective function decreased at outer iteration",
+ i)
}
a$outer.iterations <- i
a$barrier.value <- a$value
a$value <- f(a$par, ...)
a$barrier.value <- a$barrier.value - a$value
+ a$counts <- c(feval, geval)
a
}
# source the function "constrOptim.nl"
source("g:/computing/constrOptim2.txt")
################################
fr <- function(x) { ## Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
}
grr <- function(x) { ## Gradient of 'fr'
x1 <- x[1]
x2 <- x[2]
c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1))
}
constrOptim(c(.5,0), fr, grr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1))
# Note the difference in function and gradient counts
# Also, the number of outer iterations are smaller because of
# a different (and better) termination criterion
constrOptim2(c(.5,0), fr, grr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1))
################################
# Now we demonstrate the "bug" in constrOptim when maximizing, i.e. when
control$fnscale = -1
fr.neg <- function(x) { ## negative Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
-(100 * (x2 - x1 * x1)^2 + (1 - x1)^2 )
}
grr.neg <- function(x) { ## Gradient of 'fr.neg'
x1 <- x[1]
x2 <- x[2]
-c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1))
}
# This performs only one outer iteration
constrOptim(c(.5,0), fr.neg, grr.neg, ui=rbind(c(-1,0),c(1,-1)),
ci=c(-0.9,0.1), control=list(fnscale=-1))
# This correctly performs many outer iterations until convergence
constrOptim2(c(.5,0), fr.neg, grr.neg, ui=rbind(c(-1,0),c(1,-1)),
ci=c(-0.9,0.1), control=list(fnscale=-1))
################################
# Now we demonstrate the use of numerical gradient
fr <- function(x) { ## Rosenbrock Banana function
x1 <- x[1]
x2 <- x[2]
100 * (x2 - x1 * x1)^2 + (1 - x1)^2
}
grr <- function(x) { ## Gradient of 'fr'
x1 <- x[1]
x2 <- x[2]
c(-400 * x1 * (x2 - x1 * x1) - 2 * (1 - x1), 200 * (x2 - x1 * x1))
}
# Nelder-Mead is used when gradient is not specified
constrOptim(c(.5,0), fr, gr=NULL, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1))
# BFGS algorithm is used even though gradient is not specified
constrOptim2(c(.5,0), fr, ui=rbind(c(-1,0),c(1,-1)), ci=c(-0.9,0.1))
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel