Author: psteitz
Date: Sun May 24 05:04:21 2009
New Revision: 778085
URL: http://svn.apache.org/viewvc?rev=778085&view=rev
Log:
Added support for Spearman's rank correlation.
JIRA: MATH-136
Thanks to John Gant
Added:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
Modified:
commons/proper/math/trunk/pom.xml
commons/proper/math/trunk/src/site/xdoc/changes.xml
commons/proper/math/trunk/src/test/R/correlationTestCases
Modified: commons/proper/math/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/pom.xml (original)
+++ commons/proper/math/trunk/pom.xml Sun May 24 05:04:21 2009
@@ -127,6 +127,9 @@
<name>Ted Dunning</name>
</contributor>
<contributor>
+ <name>John Gant</name>
+ </contributor>
+ <contributor>
<name>Ken Geis</name>
</contributor>
<contributor>
Added:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java?rev=778085&view=auto
==============================================================================
---
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
(added)
+++
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
Sun May 24 05:04:21 2009
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.linear.DenseRealMatrix;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.stat.ranking.NaturalRanking;
+import org.apache.commons.math.stat.ranking.RankingAlgorithm;
+
+/**
+ * <p>Spearman's rank correlation. This implementation performs a rank
+ * transformation on the input data and then computes {...@link
PearsonsCorrelation}
+ * on the ranked data.</p>
+ *
+ * <p>By default, ranks are computed using {...@link NaturalRanking} with
default
+ * strategies for handling NaNs and ties in the data (NaNs maximal, ties
averaged).
+ * The ranking algorithm can be set using a constructor argument.</p>
+ *
+ * @since 2.0
+ * @version $Revision:$ $Date:$
+ */
+
+public class SpearmansCorrelation {
+
+ /** Input data */
+ private final RealMatrix data;
+
+ /** Ranking algorithm */
+ private final RankingAlgorithm rankingAlgorithm;
+
+ /** Rank correlation */
+ private final PearsonsCorrelation rankCorrelation;
+
+ /**
+ * Create a SpearmansCorrelation with the given input data matrix
+ * and ranking algorithm.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ * @param rankingAlgorithm ranking algorithm
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix, final
RankingAlgorithm rankingAlgorithm) {
+ this.data = dataMatrix.copy();
+ this.rankingAlgorithm = rankingAlgorithm;
+ rankTransform(data);
+ rankCorrelation = new PearsonsCorrelation(data);
+ }
+
+ /**
+ * Create a SpearmansCorrelation from the given data matrix.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix) {
+ this(dataMatrix, new NaturalRanking());
+ }
+
+ /**
+ * Create a SpearmansCorrelation without data.
+ */
+ public SpearmansCorrelation() {
+ data = null;
+ this.rankingAlgorithm = new NaturalRanking();
+ rankCorrelation = null;
+ }
+
+ /**
+ * Calculate the Spearman Rank Correlation Matrix.
+ *
+ * @return Spearman Rank Correlation Matrix
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return rankCorrelation.getCorrelationMatrix();
+ }
+
+ /**
+ * Returns a {...@link PearsonsCorrelation} instance constructed from the
+ * ranked input data. That is,
+ * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
+ * is equivalent to
+ * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
+ * <code>rankTransform(matrix)</code> is the result of applying the
+ * configured <code>RankingAlgorithm</code> to each of the columns of
+ * <code>matrix.</code>
+ *
+ * @return PearsonsCorrelation among ranked column data
+ */
+ public PearsonsCorrelation getRankCorrelation() {
+ return rankCorrelation;
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input matrix.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+ RealMatrix matrixCopy = matrix.copy();
+ rankTransform(matrixCopy);
+ return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input rectangular array. The columns of the array represent values
+ * of variables to be correlated.
+ *
+ * @param data matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(double[][] data) {
+ return computeCorrelationMatrix(new DenseRealMatrix(data));
+ }
+
+ /**
+ * Computes the Spearman's rank correlation coefficient between the two
arrays.
+ *
+ * </p>Throws IllegalArgumentException if the arrays do not have the same
length
+ * or their common length is less than 2</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Spearman's rank correlation coefficient for the two
arrays
+ * @throws IllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double correlation(final double[] xArray, final double[] yArray)
+ throws IllegalArgumentException {
+ if (xArray.length == yArray.length && xArray.length > 1) {
+ return new
PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
+ rankingAlgorithm.rank(yArray));
+ }
+ else {
+ throw MathRuntimeException.createIllegalArgumentException(
+ "invalid array dimensions. xArray has size {0}; yArray has
{1} elements",
+ xArray.length, yArray.length);
+ }
+ }
+
+ /**
+ * Applies rank transform to each of the columns of <code>matrix</code>
+ * using the current <code>rankingAlgorithm</code>
+ *
+ * @param matrix matrix to transform
+ */
+ private void rankTransform(RealMatrix matrix) {
+ for (int i = 0; i < matrix.getColumnDimension(); i++) {
+ matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
+ }
+ }
+}
Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Sun May 24 05:04:21 2009
@@ -39,6 +39,9 @@
</properties>
<body>
<release version="2.0" date="TBD" description="TBD">
+ <action dev="psteitz" type="add" issue="MATH-136" due=to="John Gant">
+ Added Spearman's rank correlation (SpearmansCorrelation).
+ </action>
<action dev="psteitz" type="add">
Added support for rank transformations.
</action>
Modified: commons/proper/math/trunk/src/test/R/correlationTestCases
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/correlationTestCases?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/correlationTestCases (original)
+++ commons/proper/math/trunk/src/test/R/correlationTestCases Sun May 24
05:04:21 2009
@@ -29,11 +29,22 @@
source("testFunctions") # utility test functions
options(digits=16) # override number of digits displayed
-# function to verify correlation computations
-verifyCorrelation <- function(matrix, expectedCorrelation, name) {
+# Verify Pearson's correlation
+verifyPearsonsCorrelation <- function(matrix, expectedCorrelation, name) {
correlation <- cor(matrix)
- output <- c("Correlation matrix test dataset = ", name)
- if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) {
+ output <- c("Pearson's Correlation matrix test dataset = ", name)
+ if (assertEquals(expectedCorrelation, correlation,tol,"Pearson's
Correlations")) {
+ displayPadded(output, SUCCEEDED, WIDTH)
+ } else {
+ displayPadded(output, FAILED, WIDTH)
+ }
+}
+
+# Verify Spearman's correlation
+verifySpearmansCorrelation <- function(matrix, expectedCorrelation, name) {
+ correlation <- cor(matrix, method="spearman")
+ output <- c("Spearman's Correlation matrix test dataset = ", name)
+ if (assertEquals(expectedCorrelation, correlation,tol,"Spearman's
Correlations")) {
displayPadded(output, SUCCEEDED, WIDTH)
} else {
displayPadded(output, FAILED, WIDTH)
@@ -94,6 +105,7 @@
70551,116.9,554894,4007,2827,130081,1962),
nrow = 16, ncol = 7, byrow = TRUE)
+# Pearson's
expectedCorrelation <- matrix(c(
1.000000000000000, 0.9708985250610560, 0.9835516111796693,
0.5024980838759942,
0.4573073999764817, 0.960390571594376, 0.9713294591921188,
@@ -110,7 +122,7 @@
0.971329459192119, 0.9911491900672053, 0.9952734837647849,
0.6682566045621746,
0.4172451498349454, 0.993952846232926, 1.0000000000000000),
nrow = 7, ncol = 7, byrow = TRUE)
- verifyCorrelation(longley, expectedCorrelation, "longley")
+ verifyPearsonsCorrelation(longley, expectedCorrelation, "longley")
expectedPValues <- c(
4.38904690369668e-10,
@@ -121,6 +133,19 @@
3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15,
0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15)
verifyPValues(longley, expectedPValues, "longley")
+ # Spearman's
+expectedCorrelation <- matrix(c(
+ 1, 0.982352941176471, 0.985294117647059, 0.564705882352941,
0.2264705882352941, 0.976470588235294,
+ 0.976470588235294, 0.982352941176471, 1, 0.997058823529412,
0.664705882352941, 0.2205882352941176,
+ 0.997058823529412, 0.997058823529412, 0.985294117647059,
0.997058823529412, 1, 0.638235294117647,
+ 0.2235294117647059, 0.9941176470588236, 0.9941176470588236,
0.564705882352941, 0.664705882352941,
+ 0.638235294117647, 1, -0.3411764705882353, 0.685294117647059,
0.685294117647059, 0.2264705882352941,
+ 0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 1,
0.2264705882352941, 0.2264705882352941,
+ 0.976470588235294, 0.997058823529412, 0.9941176470588236,
0.685294117647059, 0.2264705882352941, 1, 1,
+ 0.976470588235294, 0.997058823529412, 0.9941176470588236,
0.685294117647059, 0.2264705882352941, 1, 1),
+ nrow = 7, ncol = 7, byrow = TRUE)
+ verifySpearmansCorrelation(longley, expectedCorrelation, "longley")
+
# Swiss Fertility
fertility <- matrix(c(80.2,17.0,15,12,9.96,
@@ -171,15 +196,14 @@
44.7,46.6,16,29,50.43,
42.8,27.7,22,29,58.33),
nrow = 47, ncol = 5, byrow = TRUE)
-
-expectedCorrelation <- matrix(c(
- 1.0000000000000000, 0.3530791836199747, -0.6458827064572875,
-0.6637888570350691, 0.4636847006517939,
- 0.3530791836199747, 1.0000000000000000,-0.6865422086171366,
-0.6395225189483201, 0.4010950530487398,
- -0.6458827064572875, -0.6865422086171366, 1.0000000000000000,
0.6984152962884830, -0.5727418060641666,
- -0.6637888570350691, -0.6395225189483201, 0.6984152962884830,
1.0000000000000000, -0.1538589170909148,
- 0.4636847006517939, 0.4010950530487398, -0.5727418060641666,
-0.1538589170909148, 1.0000000000000000),
+ expectedCorrelation <- matrix(c(
+ 1, 0.3530791836199747, -0.6458827064572875, -0.663788857035069,
0.463684700651794,
+ 0.3530791836199747, 1, -0.6865422086171366, -0.63952251894832,
0.4010950530487398,
+ -0.6458827064572875, -0.6865422086171366, 1, 0.698415296288483,
-0.572741806064167,
+ -0.663788857035069, -0.63952251894832, 0.698415296288483, 1,
-0.1538589170909148,
+ 0.463684700651794, 0.4010950530487398, -0.572741806064167,
-0.1538589170909148, 1),
nrow = 5, ncol = 5, byrow = TRUE)
-verifyCorrelation(fertility, expectedCorrelation, "swiss fertility")
+verifyPearsonsCorrelation(fertility, expectedCorrelation, "swiss fertility")
expectedPValues <- c(
0.01491720061472623,
@@ -188,4 +212,14 @@
0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05,
0.301807756132683)
verifyPValues(fertility, expectedPValues, "swiss fertility")
+# Spearman's
+expectedCorrelation <- matrix(c(
+ 1, 0.2426642769364176, -0.660902996352354, -0.443257690360988,
0.4136455623012432,
+ 0.2426642769364176, 1, -0.598859938748963, -0.650463814145816,
0.2886878090882852,
+ -0.660902996352354, -0.598859938748963, 1, 0.674603831406147,
-0.4750575257171745,
+ -0.443257690360988, -0.650463814145816, 0.674603831406147, 1,
-0.1444163088302244,
+ 0.4136455623012432, 0.2886878090882852, -0.4750575257171745,
-0.1444163088302244, 1),
+ nrow = 5, ncol = 5, byrow = TRUE)
+ verifySpearmansCorrelation(fertility, expectedCorrelation, "swiss fertility")
+
displayDashes(WIDTH)
Added:
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java?rev=778085&view=auto
==============================================================================
---
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
(added)
+++
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
Sun May 24 05:04:21 2009
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.TestUtils;
+import org.apache.commons.math.linear.DenseRealMatrix;
+import org.apache.commons.math.linear.RealMatrix;
+
+/**
+ * Test cases for Spearman's rank correlation
+ *
+ * @since 2.0
+ * @version $Revision:$ $Date:$
+ */
+public class SpearmansRankCorrelationTest extends PearsonsCorrelationTest {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ }
+
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ /**
+ * Test Longley dataset against R.
+ */
+ public void testLongly() throws Exception {
+ RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+ SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix);
+ RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+ double[] rData = new double[] {
+ 1, 0.982352941176471, 0.985294117647059, 0.564705882352941,
0.2264705882352941, 0.976470588235294,
+ 0.976470588235294, 0.982352941176471, 1, 0.997058823529412,
0.664705882352941, 0.2205882352941176,
+ 0.997058823529412, 0.997058823529412, 0.985294117647059,
0.997058823529412, 1, 0.638235294117647,
+ 0.2235294117647059, 0.9941176470588236, 0.9941176470588236,
0.564705882352941, 0.664705882352941,
+ 0.638235294117647, 1, -0.3411764705882353, 0.685294117647059,
0.685294117647059, 0.2264705882352941,
+ 0.2205882352941176, 0.2235294117647059, -0.3411764705882353,
1, 0.2264705882352941, 0.2264705882352941,
+ 0.976470588235294, 0.997058823529412, 0.9941176470588236,
0.685294117647059, 0.2264705882352941, 1, 1,
+ 0.976470588235294, 0.997058823529412, 0.9941176470588236,
0.685294117647059, 0.2264705882352941, 1, 1
+ };
+ TestUtils.assertEquals("Spearman's correlation matrix",
createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
+ }
+
+ /**
+ * Test R swiss fertility dataset.
+ */
+ public void testSwiss() throws Exception {
+ RealMatrix matrix = createRealMatrix(swissData, 47, 5);
+ SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix);
+ RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+ double[] rData = new double[] {
+ 1, 0.2426642769364176, -0.660902996352354, -0.443257690360988,
0.4136455623012432,
+ 0.2426642769364176, 1, -0.598859938748963, -0.650463814145816,
0.2886878090882852,
+ -0.660902996352354, -0.598859938748963, 1, 0.674603831406147,
-0.4750575257171745,
+ -0.443257690360988, -0.650463814145816, 0.674603831406147, 1,
-0.1444163088302244,
+ 0.4136455623012432, 0.2886878090882852, -0.4750575257171745,
-0.1444163088302244, 1
+ };
+ TestUtils.assertEquals("Spearman's correlation matrix",
createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
+ }
+
+ /**
+ * Constant column
+ */
+ public void testConstant() {
+ double[] noVariance = new double[] {1, 1, 1, 1};
+ double[] values = new double[] {1, 2, 3, 4};
+ assertTrue(Double.isNaN(new
SpearmansCorrelation().correlation(noVariance, values)));
+ }
+
+ /**
+ * Insufficient data
+ */
+ public void testInsufficientData() {
+ double[] one = new double[] {1};
+ double[] two = new double[] {2};
+ try {
+ new SpearmansCorrelation().correlation(one, two);
+ fail("Expecting IllegalArgumentException");
+ } catch (IllegalArgumentException ex) {
+ // Expected
+ }
+ RealMatrix matrix = new DenseRealMatrix(new double[][] {{0},{1}});
+ try {
+ new SpearmansCorrelation(matrix);
+ fail("Expecting IllegalArgumentException");
+ } catch (IllegalArgumentException ex) {
+ // Expected
+ }
+ }
+
+ public void testConsistency() {
+ RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+ SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix);
+ double[][] data = matrix.getData();
+ double[] x = matrix.getColumn(0);
+ double[] y = matrix.getColumn(1);
+ assertEquals(new SpearmansCorrelation().correlation(x, y),
+ corrInstance.getCorrelationMatrix().getEntry(0, 1),
Double.MIN_VALUE);
+ TestUtils.assertEquals("Correlation matrix",
corrInstance.getCorrelationMatrix(),
+ new SpearmansCorrelation().computeCorrelationMatrix(data),
Double.MIN_VALUE);
+ }
+
+ // Not relevant here
+ public void testStdErrorConsistency() throws Exception {}
+ public void testCovarianceConsistency() throws Exception {}
+
+}