Author: psteitz Date: Sun May 24 05:04:21 2009 New Revision: 778085 URL: http://svn.apache.org/viewvc?rev=778085&view=rev Log: Added support for Spearman's rank correlation. JIRA: MATH-136 Thanks to John Gant
Added: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java Modified: commons/proper/math/trunk/pom.xml commons/proper/math/trunk/src/site/xdoc/changes.xml commons/proper/math/trunk/src/test/R/correlationTestCases Modified: commons/proper/math/trunk/pom.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=778085&r1=778084&r2=778085&view=diff ============================================================================== --- commons/proper/math/trunk/pom.xml (original) +++ commons/proper/math/trunk/pom.xml Sun May 24 05:04:21 2009 @@ -127,6 +127,9 @@ <name>Ted Dunning</name> </contributor> <contributor> + <name>John Gant</name> + </contributor> + <contributor> <name>Ken Geis</name> </contributor> <contributor> Added: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java?rev=778085&view=auto ============================================================================== --- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java (added) +++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java Sun May 24 05:04:21 2009 @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math.stat.correlation; + +import org.apache.commons.math.MathRuntimeException; +import org.apache.commons.math.linear.DenseRealMatrix; +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.stat.ranking.NaturalRanking; +import org.apache.commons.math.stat.ranking.RankingAlgorithm; + +/** + * <p>Spearman's rank correlation. This implementation performs a rank + * transformation on the input data and then computes {...@link PearsonsCorrelation} + * on the ranked data.</p> + * + * <p>By default, ranks are computed using {...@link NaturalRanking} with default + * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged). + * The ranking algorithm can be set using a constructor argument.</p> + * + * @since 2.0 + * @version $Revision:$ $Date:$ + */ + +public class SpearmansCorrelation { + + /** Input data */ + private final RealMatrix data; + + /** Ranking algorithm */ + private final RankingAlgorithm rankingAlgorithm; + + /** Rank correlation */ + private final PearsonsCorrelation rankCorrelation; + + /** + * Create a SpearmansCorrelation with the given input data matrix + * and ranking algorithm. + * + * @param dataMatrix matrix of data with columns representing + * variables to correlate + * @param rankingAlgorithm ranking algorithm + */ + public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) { + this.data = dataMatrix.copy(); + this.rankingAlgorithm = rankingAlgorithm; + rankTransform(data); + rankCorrelation = new PearsonsCorrelation(data); + } + + /** + * Create a SpearmansCorrelation from the given data matrix. + * + * @param dataMatrix matrix of data with columns representing + * variables to correlate + */ + public SpearmansCorrelation(final RealMatrix dataMatrix) { + this(dataMatrix, new NaturalRanking()); + } + + /** + * Create a SpearmansCorrelation without data. + */ + public SpearmansCorrelation() { + data = null; + this.rankingAlgorithm = new NaturalRanking(); + rankCorrelation = null; + } + + /** + * Calculate the Spearman Rank Correlation Matrix. + * + * @return Spearman Rank Correlation Matrix + */ + public RealMatrix getCorrelationMatrix() { + return rankCorrelation.getCorrelationMatrix(); + } + + /** + * Returns a {...@link PearsonsCorrelation} instance constructed from the + * ranked input data. That is, + * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code> + * is equivalent to + * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where + * <code>rankTransform(matrix)</code> is the result of applying the + * configured <code>RankingAlgorithm</code> to each of the columns of + * <code>matrix.</code> + * + * @return PearsonsCorrelation among ranked column data + */ + public PearsonsCorrelation getRankCorrelation() { + return rankCorrelation; + } + + /** + * Computes the Spearman's rank correlation matrix for the columns of the + * input matrix. + * + * @param matrix matrix with columns representing variables to correlate + * @return correlation matrix + */ + public RealMatrix computeCorrelationMatrix(RealMatrix matrix) { + RealMatrix matrixCopy = matrix.copy(); + rankTransform(matrixCopy); + return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy); + } + + /** + * Computes the Spearman's rank correlation matrix for the columns of the + * input rectangular array. The columns of the array represent values + * of variables to be correlated. + * + * @param data matrix with columns representing variables to correlate + * @return correlation matrix + */ + public RealMatrix computeCorrelationMatrix(double[][] data) { + return computeCorrelationMatrix(new DenseRealMatrix(data)); + } + + /** + * Computes the Spearman's rank correlation coefficient between the two arrays. + * + * </p>Throws IllegalArgumentException if the arrays do not have the same length + * or their common length is less than 2</p> + * + * @param xArray first data array + * @param yArray second data array + * @return Returns Spearman's rank correlation coefficient for the two arrays + * @throws IllegalArgumentException if the arrays lengths do not match or + * there is insufficient data + */ + public double correlation(final double[] xArray, final double[] yArray) + throws IllegalArgumentException { + if (xArray.length == yArray.length && xArray.length > 1) { + return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray), + rankingAlgorithm.rank(yArray)); + } + else { + throw MathRuntimeException.createIllegalArgumentException( + "invalid array dimensions. xArray has size {0}; yArray has {1} elements", + xArray.length, yArray.length); + } + } + + /** + * Applies rank transform to each of the columns of <code>matrix</code> + * using the current <code>rankingAlgorithm</code> + * + * @param matrix matrix to transform + */ + private void rankTransform(RealMatrix matrix) { + for (int i = 0; i < matrix.getColumnDimension(); i++) { + matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i))); + } + } +} Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=778085&r1=778084&r2=778085&view=diff ============================================================================== --- commons/proper/math/trunk/src/site/xdoc/changes.xml (original) +++ commons/proper/math/trunk/src/site/xdoc/changes.xml Sun May 24 05:04:21 2009 @@ -39,6 +39,9 @@ </properties> <body> <release version="2.0" date="TBD" description="TBD"> + <action dev="psteitz" type="add" issue="MATH-136" due=to="John Gant"> + Added Spearman's rank correlation (SpearmansCorrelation). + </action> <action dev="psteitz" type="add"> Added support for rank transformations. </action> Modified: commons/proper/math/trunk/src/test/R/correlationTestCases URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/correlationTestCases?rev=778085&r1=778084&r2=778085&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/R/correlationTestCases (original) +++ commons/proper/math/trunk/src/test/R/correlationTestCases Sun May 24 05:04:21 2009 @@ -29,11 +29,22 @@ source("testFunctions") # utility test functions options(digits=16) # override number of digits displayed -# function to verify correlation computations -verifyCorrelation <- function(matrix, expectedCorrelation, name) { +# Verify Pearson's correlation +verifyPearsonsCorrelation <- function(matrix, expectedCorrelation, name) { correlation <- cor(matrix) - output <- c("Correlation matrix test dataset = ", name) - if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) { + output <- c("Pearson's Correlation matrix test dataset = ", name) + if (assertEquals(expectedCorrelation, correlation,tol,"Pearson's Correlations")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# Verify Spearman's correlation +verifySpearmansCorrelation <- function(matrix, expectedCorrelation, name) { + correlation <- cor(matrix, method="spearman") + output <- c("Spearman's Correlation matrix test dataset = ", name) + if (assertEquals(expectedCorrelation, correlation,tol,"Spearman's Correlations")) { displayPadded(output, SUCCEEDED, WIDTH) } else { displayPadded(output, FAILED, WIDTH) @@ -94,6 +105,7 @@ 70551,116.9,554894,4007,2827,130081,1962), nrow = 16, ncol = 7, byrow = TRUE) +# Pearson's expectedCorrelation <- matrix(c( 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942, 0.4573073999764817, 0.960390571594376, 0.9713294591921188, @@ -110,7 +122,7 @@ 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746, 0.4172451498349454, 0.993952846232926, 1.0000000000000000), nrow = 7, ncol = 7, byrow = TRUE) - verifyCorrelation(longley, expectedCorrelation, "longley") + verifyPearsonsCorrelation(longley, expectedCorrelation, "longley") expectedPValues <- c( 4.38904690369668e-10, @@ -121,6 +133,19 @@ 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15) verifyPValues(longley, expectedPValues, "longley") + # Spearman's +expectedCorrelation <- matrix(c( + 1, 0.982352941176471, 0.985294117647059, 0.564705882352941, 0.2264705882352941, 0.976470588235294, + 0.976470588235294, 0.982352941176471, 1, 0.997058823529412, 0.664705882352941, 0.2205882352941176, + 0.997058823529412, 0.997058823529412, 0.985294117647059, 0.997058823529412, 1, 0.638235294117647, + 0.2235294117647059, 0.9941176470588236, 0.9941176470588236, 0.564705882352941, 0.664705882352941, + 0.638235294117647, 1, -0.3411764705882353, 0.685294117647059, 0.685294117647059, 0.2264705882352941, + 0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 1, 0.2264705882352941, 0.2264705882352941, + 0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1, + 0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1), + nrow = 7, ncol = 7, byrow = TRUE) + verifySpearmansCorrelation(longley, expectedCorrelation, "longley") + # Swiss Fertility fertility <- matrix(c(80.2,17.0,15,12,9.96, @@ -171,15 +196,14 @@ 44.7,46.6,16,29,50.43, 42.8,27.7,22,29,58.33), nrow = 47, ncol = 5, byrow = TRUE) - -expectedCorrelation <- matrix(c( - 1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691, 0.4636847006517939, - 0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398, - -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666, - -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148, - 0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000), + expectedCorrelation <- matrix(c( + 1, 0.3530791836199747, -0.6458827064572875, -0.663788857035069, 0.463684700651794, + 0.3530791836199747, 1, -0.6865422086171366, -0.63952251894832, 0.4010950530487398, + -0.6458827064572875, -0.6865422086171366, 1, 0.698415296288483, -0.572741806064167, + -0.663788857035069, -0.63952251894832, 0.698415296288483, 1, -0.1538589170909148, + 0.463684700651794, 0.4010950530487398, -0.572741806064167, -0.1538589170909148, 1), nrow = 5, ncol = 5, byrow = TRUE) -verifyCorrelation(fertility, expectedCorrelation, "swiss fertility") +verifyPearsonsCorrelation(fertility, expectedCorrelation, "swiss fertility") expectedPValues <- c( 0.01491720061472623, @@ -188,4 +212,14 @@ 0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683) verifyPValues(fertility, expectedPValues, "swiss fertility") +# Spearman's +expectedCorrelation <- matrix(c( + 1, 0.2426642769364176, -0.660902996352354, -0.443257690360988, 0.4136455623012432, + 0.2426642769364176, 1, -0.598859938748963, -0.650463814145816, 0.2886878090882852, + -0.660902996352354, -0.598859938748963, 1, 0.674603831406147, -0.4750575257171745, + -0.443257690360988, -0.650463814145816, 0.674603831406147, 1, -0.1444163088302244, + 0.4136455623012432, 0.2886878090882852, -0.4750575257171745, -0.1444163088302244, 1), + nrow = 5, ncol = 5, byrow = TRUE) + verifySpearmansCorrelation(fertility, expectedCorrelation, "swiss fertility") + displayDashes(WIDTH) Added: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java?rev=778085&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java (added) +++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java Sun May 24 05:04:21 2009 @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.correlation; + +import org.apache.commons.math.TestUtils; +import org.apache.commons.math.linear.DenseRealMatrix; +import org.apache.commons.math.linear.RealMatrix; + +/** + * Test cases for Spearman's rank correlation + * + * @since 2.0 + * @version $Revision:$ $Date:$ + */ +public class SpearmansRankCorrelationTest extends PearsonsCorrelationTest { + + protected void setUp() throws Exception { + super.setUp(); + } + + protected void tearDown() throws Exception { + super.tearDown(); + } + + /** + * Test Longley dataset against R. + */ + public void testLongly() throws Exception { + RealMatrix matrix = createRealMatrix(longleyData, 16, 7); + SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); + RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); + double[] rData = new double[] { + 1, 0.982352941176471, 0.985294117647059, 0.564705882352941, 0.2264705882352941, 0.976470588235294, + 0.976470588235294, 0.982352941176471, 1, 0.997058823529412, 0.664705882352941, 0.2205882352941176, + 0.997058823529412, 0.997058823529412, 0.985294117647059, 0.997058823529412, 1, 0.638235294117647, + 0.2235294117647059, 0.9941176470588236, 0.9941176470588236, 0.564705882352941, 0.664705882352941, + 0.638235294117647, 1, -0.3411764705882353, 0.685294117647059, 0.685294117647059, 0.2264705882352941, + 0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 1, 0.2264705882352941, 0.2264705882352941, + 0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1, + 0.976470588235294, 0.997058823529412, 0.9941176470588236, 0.685294117647059, 0.2264705882352941, 1, 1 + }; + TestUtils.assertEquals("Spearman's correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15); + } + + /** + * Test R swiss fertility dataset. + */ + public void testSwiss() throws Exception { + RealMatrix matrix = createRealMatrix(swissData, 47, 5); + SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); + RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); + double[] rData = new double[] { + 1, 0.2426642769364176, -0.660902996352354, -0.443257690360988, 0.4136455623012432, + 0.2426642769364176, 1, -0.598859938748963, -0.650463814145816, 0.2886878090882852, + -0.660902996352354, -0.598859938748963, 1, 0.674603831406147, -0.4750575257171745, + -0.443257690360988, -0.650463814145816, 0.674603831406147, 1, -0.1444163088302244, + 0.4136455623012432, 0.2886878090882852, -0.4750575257171745, -0.1444163088302244, 1 + }; + TestUtils.assertEquals("Spearman's correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15); + } + + /** + * Constant column + */ + public void testConstant() { + double[] noVariance = new double[] {1, 1, 1, 1}; + double[] values = new double[] {1, 2, 3, 4}; + assertTrue(Double.isNaN(new SpearmansCorrelation().correlation(noVariance, values))); + } + + /** + * Insufficient data + */ + public void testInsufficientData() { + double[] one = new double[] {1}; + double[] two = new double[] {2}; + try { + new SpearmansCorrelation().correlation(one, two); + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // Expected + } + RealMatrix matrix = new DenseRealMatrix(new double[][] {{0},{1}}); + try { + new SpearmansCorrelation(matrix); + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // Expected + } + } + + public void testConsistency() { + RealMatrix matrix = createRealMatrix(longleyData, 16, 7); + SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); + double[][] data = matrix.getData(); + double[] x = matrix.getColumn(0); + double[] y = matrix.getColumn(1); + assertEquals(new SpearmansCorrelation().correlation(x, y), + corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE); + TestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(), + new SpearmansCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE); + } + + // Not relevant here + public void testStdErrorConsistency() throws Exception {} + public void testCovarianceConsistency() throws Exception {} + +}