For an application of ridge regression, I need to get the covariance matrices of the estimated regression coefficients in addition to the coefficients for all values of the ridge contstant, lambda.

I've studied the code in MASS:::lm.ridge, but don't see how to do this because the code is vectorized using
one svd calculation.  The relevant lines from lm.ridge, using X, Y are:

    Xscale <- drop(rep(1/n, n) %*% X^2)^0.5
    X <- X/rep(Xscale, rep(n, p))
    Xs <- svd(X)
    rhs <- t(Xs$u) %*% Y
    d <- Xs$d
    lscoef <- Xs$v %*% (rhs/d)
    lsfit <- X %*% lscoef
    resid <- Y - lsfit
    ...
    k <- length(lambda)
    dx <- length(d)
    div <- d^2 + rep(lambda, rep(dx, k))
    a <- drop(d * rhs)/div
    dim(a) <- c(dx, k)
    coef <- Xs$v %*% a
    dimnames(coef) <- list(names(Xscale), format(lambda))

Below is a naive, iterative function, which seems correct to me (though it omits the intercept)
but it does not give the same estimated coefficients
as lm.ridge. A test case is included at the end. Can someone help me resolve the discrepancy?

ridge <- function(y, X, lambda=0){
    #dimensions
    n <- nrow(X)
    p <- ncol(X)
    #center X and y
    Xm <- colMeans(X)
    ym <- mean(y)
    X <- X - rep(Xm, rep(n, p))
    y <- y - ym
    #scale X, as in MASS::lm.ridge
    Xscale <- drop(rep(1/n, n) %*% X^2)^0.5
    X <- as.matrix(X/rep(Xscale, rep(n, p)))

    XPX <- crossprod(X)
    XPy <- crossprod(X,y)
    I <- diag(p)
    lmfit <- lm.fit(X, y)
    MSE <- sum(lmfit$residuals^2) / (n-p)

    # prepare output
        coef <- matrix(0, length(lambda), p)
        cov <- as.list(rep(0, length(lambda)))
        mse <- rep(0, length(lambda))

    # loop over lambdas
    for(i in seq(length(lambda))) {
        lam <- lambda[i]
        XPXr <- XPX + lam * I
        XPXI <- solve(XPXr)
        coef[i,] <- XPXI %*% XPy
        cov[[i]] <- MSE * XPXI %*% XPX %*% XPXI
        res <- y - X %*% coef[i,]
        mse[i] <- sum(res^2) / (n-p)
        dimnames(cov[[i]]) <- list(colnames(X), colnames(X))
    }
    dimnames(coef) <- list(format(lambda), colnames(X))
result <- list(lambda=lambda, coef=coef, cov=cov, mse=mse, scales=Xscale)
    class(result) <- "ridge"
    result
}

# Test:

> lambda <- c(0, 0.005, 0.01, 0.02, 0.04, 0.08)
> lambdaf <- c("", ".005", ".01", ".02", ".04", ".08")
> lridge <- ridge(longley.y, longley.X, lambda=lambda)
> lridge$coef
             GNP Unemployed Armed.Forces  Population     Year GNP.deflator
0.000 -3.4471925  -1.827886   -0.6962102 -0.34419721 8.431972   0.15737965
0.005 -1.0424783  -1.491395   -0.6234680 -0.93558040 6.566532  -0.04175039
0.010 -0.1797967  -1.361047   -0.5881396 -1.00316772 5.656287  -0.02612152
0.020  0.4994945  -1.245137   -0.5476331 -0.86755299 4.626116   0.09766305
0.040  0.9059471  -1.155229   -0.5039108 -0.52347060 3.576502   0.32123994
0.080  1.0907048  -1.086421   -0.4582525 -0.08596324 2.641649   0.57025165


> (lmridge <- lm.ridge(Employed ~ GNP + Unemployed + Armed.Forces + Population + Year + GNP.deflator, lambda=lambda, data=longley))
                         GNP  Unemployed Armed.Forces  Population      Year
0.000 -3482.259 -0.035819179 -0.02020230 -0.010332269 -0.05110411 1.8291515
0.005 -2690.238 -0.010832211 -0.01648330 -0.009252720 -0.13890874 1.4244808
0.010 -2307.348 -0.001868237 -0.01504267 -0.008728422 -0.14894365 1.2270208
0.020 -1877.437  0.005190160 -0.01376159 -0.008127275 -0.12880848 1.0035455
0.040 -1442.709  0.009413540 -0.01276791 -0.007478405 -0.07772142 0.7758522
0.080 -1057.555  0.011333324 -0.01200742 -0.006800801 -0.01276325 0.5730541
      GNP.deflator
0.000  0.015061872
0.005 -0.003995682
0.010 -0.002499936
0.020  0.009346751
0.040  0.030743969
0.080  0.054575402
>




--
Michael Friendly     Email: friendly AT yorku DOT ca
Professor, Psychology Dept.
York University      Voice: 416 736-5115 x66249 Fax: 416 736-5814
4700 Keele Street    Web:   http://www.datavis.ca
Toronto, ONT  M3J 1P3 CANADA

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to