Author: psteitz Date: Sun Aug 22 13:13:35 2010 New Revision: 987897 URL: http://svn.apache.org/viewvc?rev=987897&view=rev Log: Corrected Y variance formula and added error variance methods to return what were previously reported as Y variances. JIRA: MATH-392 Reported and patched by Mark Devaney
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java commons/proper/math/trunk/src/site/xdoc/changes.xml commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java Sun Aug 22 13:13:35 2010 @@ -22,6 +22,7 @@ import org.apache.commons.math.linear.Re import org.apache.commons.math.linear.Array2DRowRealMatrix; import org.apache.commons.math.linear.RealVector; import org.apache.commons.math.linear.ArrayRealVector; +import org.apache.commons.math.stat.descriptive.moment.Variance; /** * Abstract base class for implementations of MultipleLinearRegression. @@ -148,7 +149,8 @@ public abstract class AbstractMultipleLi */ public double[] estimateRegressionParametersStandardErrors() { double[][] betaVariance = estimateRegressionParametersVariance(); - double sigma = calculateYVariance(); + RealVector residuals = calculateResiduals(); + double sigma = calculateErrorVariance(); int length = betaVariance[0].length; double[] result = new double[length]; for (int i = 0; i < length; i++) { @@ -165,6 +167,25 @@ public abstract class AbstractMultipleLi } /** + * Estimates the variance of the error. + * + * @return estimate of the error variance + */ + public double estimateErrorVariance() { + return calculateErrorVariance(); + + } + + /** + * Estimates the standard error of the regression. + * + * @return regression standard error + */ + public double estimateRegressionStandardError() { + return Math.sqrt(estimateErrorVariance()); + } + + /** * Calculates the beta of multiple linear regression in matrix notation. * * @return beta @@ -179,12 +200,31 @@ public abstract class AbstractMultipleLi */ protected abstract RealMatrix calculateBetaVariance(); + /** - * Calculates the Y variance of multiple linear regression. + * Calculates the variance of the y values. * * @return Y variance */ - protected abstract double calculateYVariance(); + protected double calculateYVariance() { + return new Variance().evaluate(Y.getData()); + } + + /** + * <p>Calculates the variance of the error term.</p> + * Uses the formula <pre> + * var(u) = u · u / (n - k) + * </pre> + * where n and k are the row and column dimensions of the design + * matrix X. + * + * @return error variance estimate + */ + protected double calculateErrorVariance() { + RealVector residuals = calculateResiduals(); + return residuals.dotProduct(residuals) / + (X.getRowDimension() - X.getColumnDimension()); + } /** * Calculates the residuals of multiple linear regression in matrix Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java Sun Aug 22 13:13:35 2010 @@ -21,7 +21,6 @@ import org.apache.commons.math.linear.Re import org.apache.commons.math.linear.Array2DRowRealMatrix; import org.apache.commons.math.linear.RealVector; - /** * The GLS implementation of the multiple linear regression. * @@ -101,7 +100,7 @@ public class GLSMultipleLinearRegression } /** - * Calculates the variance on the beta by GLS. + * Calculates the variance on the beta. * <pre> * Var(b)=(X' Omega^-1 X)^-1 * </pre> @@ -114,18 +113,23 @@ public class GLSMultipleLinearRegression return new LUDecompositionImpl(XTOIX).getSolver().getInverse(); } + /** - * Calculates the variance on the y by GLS. + * Calculates the estimated variance of the error term using the formula * <pre> - * Var(y)=Tr(u' Omega^-1 u)/(n-k) + * Var(u) = Tr(u' Omega^-1 u)/(n-k) * </pre> - * @return The Y variance + * where n and k are the row and column dimensions of the design + * matrix X. + * + * @return error variance */ @Override - protected double calculateYVariance() { + protected double calculateErrorVariance() { RealVector residuals = calculateResiduals(); double t = residuals.dotProduct(getOmegaInverse().operate(residuals)); return t / (X.getRowDimension() - X.getColumnDimension()); + } } Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java Sun Aug 22 13:13:35 2010 @@ -49,7 +49,7 @@ import org.apache.commons.math.linear.Re * (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> R b = (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> Q<sup>T</sup> y <br/> * R b = Q<sup>T</sup> y * </p> - * Given Q and R, the last equation is solved by back-subsitution.</p> + * Given Q and R, the last equation is solved by back-substitution.</p> * * @version $Revision$ $Date$ * @since 2.0 @@ -161,19 +161,4 @@ public class OLSMultipleLinearRegression return Rinv.multiply(Rinv.transpose()); } - - /** - * <p>Calculates the variance on the Y by OLS. - * </p> - * <p> Var(y) = Tr(u<sup>T</sup>u)/(n - k) - * </p> - * @return The Y variance - */ - @Override - protected double calculateYVariance() { - RealVector residuals = calculateResiduals(); - return residuals.dotProduct(residuals) / - (X.getRowDimension() - X.getColumnDimension()); - } - } Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/site/xdoc/changes.xml (original) +++ commons/proper/math/trunk/src/site/xdoc/changes.xml Sun Aug 22 13:13:35 2010 @@ -52,6 +52,13 @@ The <action> type attribute can be add,u If the output is not quite correct, check for invisible trailing spaces! --> <release version="2.2" date="TBD" description="TBD"> + <action dev="psteitz" type="fix" issue="MATH-392" due-to="Mark Devaney"> + Corrected the formula used for Y variance returned by calculateYVariance and associated + methods in multiple regression classes (AbstractMultipleLinearRegression, + OLSMultipleLinearRegression, GLSMultipleLinearRegression). These methods previously returned + estimates of the variance in the model error term. New "calulateErrorVariance" methods have + been added to compute what was previously returned by calculateYVariance. + </action> <action dev="dimpbx" type="fix" issue="MATH-406"> Bug fixed in Levenberg-Marquardt (handling of weights). </action> Modified: commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases (original) +++ commons/proper/math/trunk/src/test/R/multipleOLSRegressionTestCases Sun Aug 22 13:13:35 2010 @@ -32,10 +32,11 @@ options(digits=16) # override # function to verify OLS computations verifyRegression <- function(model, expectedBeta, expectedResiduals, - expectedErrors, modelName) { + expectedErrors, expectedStdError, modelName) { betaHat <- as.vector(coefficients(model)) residuals <- as.vector(residuals(model)) errors <- as.vector(as.matrix(coefficients(summary(model)))[,2]) + stdError <- summary(model)$sigma output <- c("Parameter test dataset = ", modelName) if (assertEquals(expectedBeta,betaHat,tol,"Parameters")) { displayPadded(output, SUCCEEDED, WIDTH) @@ -53,7 +54,13 @@ verifyRegression <- function(model, expe displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) - } + } + output <- c("Standard Error test dataset = ", modelName) + if (assertEquals(expectedStdError,stdError,tol,"Regression Standard Error")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } } #-------------------------------------------------------------------------- @@ -70,7 +77,8 @@ model <- lm(y ~ x1 + x2 + x3 + x4 + x5) expectedBeta <- c(11.0,0.5,0.666666666666667,0.75,0.8,0.8333333333333333) expectedResiduals <- c(0,0,0,0,0,0) expectedErrors <- c(NaN,NaN,NaN,NaN,NaN,NaN) -verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, +expectedStdError <- NaN +verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError, "perfect fit") # Longly @@ -125,8 +133,9 @@ expectedResiduals <- c( 267.340029759711 -13.18035686637081,14.30477260005235,455.394094551857,-17.26892711483297, -39.0550425226967,-155.5499735953195,-85.6713080421283,341.9315139607727, -206.7578251937366) +expectedStdError <- 304.8540735619638 -verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, +verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError, "Longly") # Swiss Fertility (R dataset named "swiss") @@ -215,7 +224,9 @@ expectedResiduals <- c(7.104426785973051 15.0147574652763112,4.8625103516321015,-7.1597256413907706, -0.4515205619767598,-10.2916870903837587,-15.7812984571900063) -verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, +expectedStdError <- 7.73642194433223 + +verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, expectedStdError, "Swiss Fertility") Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegressionTest.java Sun Aug 22 13:13:35 2010 @@ -18,6 +18,8 @@ package org.apache.commons.math.stat.reg import org.junit.Before; import org.junit.Test; +import org.apache.commons.math.TestUtils; +import org.apache.commons.math.stat.StatUtils; public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest { @@ -121,4 +123,16 @@ public class GLSMultipleLinearRegression return y.length; } + /** + * test calculateYVariance + */ + @Test + public void testYVariance() { + + // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0}; + + GLSMultipleLinearRegression model = new GLSMultipleLinearRegression(); + model.newSampleData(y, x, omega); + TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0); + } } Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java?rev=987897&r1=987896&r2=987897&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegressionTest.java Sun Aug 22 13:13:35 2010 @@ -24,6 +24,7 @@ import org.apache.commons.math.linear.Ma import org.apache.commons.math.linear.MatrixVisitorException; import org.apache.commons.math.linear.RealMatrix; import org.apache.commons.math.linear.Array2DRowRealMatrix; +import org.apache.commons.math.stat.StatUtils; import org.junit.Before; import org.junit.Test; @@ -82,7 +83,7 @@ public class OLSMultipleLinearRegression } @Test - public void testPerfectFit() { + public void testPerfectFit() throws Exception { double[] betaHat = regression.estimateRegressionParameters(); TestUtils.assertEquals(betaHat, new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 }, @@ -108,6 +109,7 @@ public class OLSMultipleLinearRegression assertEquals(0.0, errors.subtract(referenceVariance).getNorm(), 5.0e-16 * referenceVariance.getNorm()); + } @@ -122,7 +124,7 @@ public class OLSMultipleLinearRegression * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat */ @Test - public void testLongly() { + public void testLongly() throws Exception { // Y values are first, then independent vars // Each row is one observation double[] design = new double[] { @@ -180,6 +182,11 @@ public class OLSMultipleLinearRegression 0.214274163161675, 0.226073200069370, 455.478499142212}, errors, 1E-6); + + // Check regression standard error against R + assertEquals(304.8540735619638, model.estimateRegressionStandardError(), 1E-10); + + checkVarianceConsistency(model); } /** @@ -187,7 +194,7 @@ public class OLSMultipleLinearRegression * Data Source: R datasets package */ @Test - public void testSwissFertility() { + public void testSwissFertility() throws Exception { double[] design = new double[] { 80.2,17.0,15,12,9.96, 83.1,45.1,6,9,84.84, @@ -283,6 +290,11 @@ public class OLSMultipleLinearRegression 0.27410957467466, 0.19454551679325, 0.03726654773803}, errors, 1E-10); + + // Check regression standard error against R + assertEquals(7.73642194433223, model.estimateRegressionStandardError(), 1E-12); + + checkVarianceConsistency(model); } /** @@ -354,4 +366,34 @@ public class OLSMultipleLinearRegression double[] hatResiduals = I.subtract(hat).operate(model.Y).getData(); TestUtils.assertEquals(residuals, hatResiduals, 10e-12); } + + /** + * test calculateYVariance + */ + @Test + public void testYVariance() { + + // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0}; + + OLSMultipleLinearRegression model = new OLSMultipleLinearRegression(); + model.newSampleData(y, x); + TestUtils.assertEquals(model.calculateYVariance(), 3.5, 0); + } + + /** + * Verifies that calculateYVariance and calculateResidualVariance return consistent + * values with direct variance computation from Y, residuals, respectively. + */ + protected void checkVarianceConsistency(OLSMultipleLinearRegression model) throws Exception { + // Check Y variance consistency + TestUtils.assertEquals(StatUtils.variance(model.Y.getData()), model.calculateYVariance(), 0); + + // Check residual variance consistency + double[] residuals = model.calculateResiduals().getData(); + RealMatrix X = model.X; + TestUtils.assertEquals( + StatUtils.variance(model.calculateResiduals().getData()) * (residuals.length - 1), + model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20); + + } }