Author: psteitz
Date: Sun May 24 05:04:21 2009
New Revision: 778085

URL: http://svn.apache.org/viewvc?rev=778085&view=rev
Log:
Added support for Spearman's rank correlation.
JIRA: MATH-136
Thanks to John Gant

Added:
    
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
    
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
Modified:
    commons/proper/math/trunk/pom.xml
    commons/proper/math/trunk/src/site/xdoc/changes.xml
    commons/proper/math/trunk/src/test/R/correlationTestCases

Modified: commons/proper/math/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/pom.xml (original)
+++ commons/proper/math/trunk/pom.xml Sun May 24 05:04:21 2009
@@ -127,6 +127,9 @@
       <name>Ted Dunning</name>
     </contributor>
     <contributor>
+      <name>John Gant</name>
+    </contributor>
+    <contributor>
       <name>Ken Geis</name>
     </contributor>
     <contributor>

Added: 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java?rev=778085&view=auto
==============================================================================
--- 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
 (added)
+++ 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
 Sun May 24 05:04:21 2009
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.linear.DenseRealMatrix;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.stat.ranking.NaturalRanking;
+import org.apache.commons.math.stat.ranking.RankingAlgorithm;
+
+/**
+ * <p>Spearman's rank correlation. This implementation performs a rank
+ * transformation on the input data and then computes {...@link 
PearsonsCorrelation}
+ * on the ranked data.</p>
+ * 
+ * <p>By default, ranks are computed using {...@link NaturalRanking} with 
default
+ * strategies for handling NaNs and ties in the data (NaNs maximal, ties 
averaged).
+ * The ranking algorithm can be set using a constructor argument.</p>
+ * 
+ * @since 2.0
+ * @version $Revision:$ $Date:$
+ */
+
+public class SpearmansCorrelation {   
+   
+    /** Input data */
+    private final RealMatrix data;
+    
+    /** Ranking algorithm  */
+    private final RankingAlgorithm rankingAlgorithm;
+    
+    /** Rank correlation */
+    private final PearsonsCorrelation rankCorrelation;
+    
+    /**
+     * Create a SpearmansCorrelation with the given input data matrix
+     * and ranking algorithm.
+     * 
+     * @param dataMatrix matrix of data with columns representing
+     * variables to correlate
+     * @param rankingAlgorithm ranking algorithm
+     */    
+    public SpearmansCorrelation(final RealMatrix dataMatrix, final 
RankingAlgorithm rankingAlgorithm) {
+        this.data = dataMatrix.copy(); 
+        this.rankingAlgorithm = rankingAlgorithm;
+        rankTransform(data);
+        rankCorrelation = new PearsonsCorrelation(data);
+    }
+    
+    /**
+     * Create a SpearmansCorrelation from the given data matrix.
+     * 
+     * @param dataMatrix matrix of data with columns representing
+     * variables to correlate
+     */
+    public SpearmansCorrelation(final RealMatrix dataMatrix) {
+        this(dataMatrix, new NaturalRanking());
+    }
+    
+    /**
+     * Create a SpearmansCorrelation without data.
+     */
+    public SpearmansCorrelation() {
+        data = null; 
+        this.rankingAlgorithm = new NaturalRanking();
+        rankCorrelation = null;
+    }
+    
+    /**
+     * Calculate the Spearman Rank Correlation Matrix. 
+     * 
+     * @return Spearman Rank Correlation Matrix
+     */
+    public RealMatrix getCorrelationMatrix() {
+        return rankCorrelation.getCorrelationMatrix();
+    }
+    
+    /**
+     * Returns a {...@link PearsonsCorrelation} instance constructed from the
+     * ranked input data. That is,
+     * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
+     * is equivalent to 
+     * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
+     * <code>rankTransform(matrix)</code> is the result of applying the
+     * configured <code>RankingAlgorithm</code> to each of the columns of
+     * <code>matrix.</code>
+     * 
+     * @return PearsonsCorrelation among ranked column data
+     */
+    public PearsonsCorrelation getRankCorrelation() {
+        return rankCorrelation;
+    }
+    
+    /**
+     * Computes the Spearman's rank correlation matrix for the columns of the
+     * input matrix.
+     * 
+     * @param matrix matrix with columns representing variables to correlate
+     * @return correlation matrix
+     */
+    public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+        RealMatrix matrixCopy = matrix.copy();
+        rankTransform(matrixCopy);
+        return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
+    }
+    
+    /**
+     * Computes the Spearman's rank correlation matrix for the columns of the
+     * input rectangular array.  The columns of the array represent values
+     * of variables to be correlated.
+     * 
+     * @param data matrix with columns representing variables to correlate
+     * @return correlation matrix
+     */
+    public RealMatrix computeCorrelationMatrix(double[][] data) {
+       return computeCorrelationMatrix(new DenseRealMatrix(data));
+    }
+    
+    /**
+     * Computes the Spearman's rank correlation coefficient between the two 
arrays.
+     * 
+     * </p>Throws IllegalArgumentException if the arrays do not have the same 
length
+     * or their common length is less than 2</p>
+     *
+     * @param xArray first data array
+     * @param yArray second data array
+     * @return Returns Spearman's rank correlation coefficient for the two 
arrays 
+     * @throws  IllegalArgumentException if the arrays lengths do not match or
+     * there is insufficient data
+     */
+    public double correlation(final double[] xArray, final double[] yArray)
+    throws IllegalArgumentException {
+        if (xArray.length == yArray.length && xArray.length > 1) {
+            return new 
PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
+                    rankingAlgorithm.rank(yArray));
+        }
+        else {
+            throw MathRuntimeException.createIllegalArgumentException(
+                    "invalid array dimensions. xArray has size {0}; yArray has 
{1} elements",
+                    xArray.length, yArray.length);
+        }
+    }
+    
+    /**
+     * Applies rank transform to each of the columns of <code>matrix</code>
+     * using the current <code>rankingAlgorithm</code>
+     * 
+     * @param matrix matrix to transform
+     */
+    private void rankTransform(RealMatrix matrix) {
+        for (int i = 0; i < matrix.getColumnDimension(); i++) {
+            matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
+        }
+    }
+}

Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Sun May 24 05:04:21 2009
@@ -39,6 +39,9 @@
   </properties>
   <body>
     <release version="2.0" date="TBD" description="TBD">
+      <action dev="psteitz" type="add" issue="MATH-136" due=to="John Gant">
+        Added Spearman's rank correlation (SpearmansCorrelation).
+      </action>
       <action dev="psteitz" type="add">
         Added support for rank transformations.
       </action>

Modified: commons/proper/math/trunk/src/test/R/correlationTestCases
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/correlationTestCases?rev=778085&r1=778084&r2=778085&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/correlationTestCases (original)
+++ commons/proper/math/trunk/src/test/R/correlationTestCases Sun May 24 
05:04:21 2009
@@ -29,11 +29,22 @@
 source("testFunctions")           # utility test functions
 options(digits=16)                   # override number of digits displayed
 
-# function to verify correlation computations
-verifyCorrelation <- function(matrix, expectedCorrelation, name) {
+# Verify Pearson's correlation
+verifyPearsonsCorrelation <- function(matrix, expectedCorrelation, name) {
     correlation <- cor(matrix)
-    output <- c("Correlation matrix test dataset = ", name)
-    if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) {
+    output <- c("Pearson's Correlation matrix test dataset = ", name)
+    if (assertEquals(expectedCorrelation, correlation,tol,"Pearson's 
Correlations")) {
+        displayPadded(output, SUCCEEDED, WIDTH)
+    } else {
+        displayPadded(output, FAILED, WIDTH)
+    }  
+}
+
+# Verify Spearman's correlation
+verifySpearmansCorrelation <- function(matrix, expectedCorrelation, name) {
+    correlation <- cor(matrix, method="spearman")
+    output <- c("Spearman's Correlation matrix test dataset = ", name)
+    if (assertEquals(expectedCorrelation, correlation,tol,"Spearman's 
Correlations")) {
         displayPadded(output, SUCCEEDED, WIDTH)
     } else {
         displayPadded(output, FAILED, WIDTH)
@@ -94,6 +105,7 @@
                     70551,116.9,554894,4007,2827,130081,1962),
                     nrow = 16, ncol = 7, byrow = TRUE)
 
+# Pearson's
 expectedCorrelation <- matrix(c(
          1.000000000000000, 0.9708985250610560, 0.9835516111796693, 
0.5024980838759942,
          0.4573073999764817, 0.960390571594376, 0.9713294591921188,
@@ -110,7 +122,7 @@
           0.971329459192119, 0.9911491900672053, 0.9952734837647849, 
0.6682566045621746,
           0.4172451498349454, 0.993952846232926, 1.0000000000000000),
           nrow = 7, ncol = 7, byrow = TRUE)
- verifyCorrelation(longley, expectedCorrelation, "longley")
+ verifyPearsonsCorrelation(longley, expectedCorrelation, "longley")
  
  expectedPValues <- c(
           4.38904690369668e-10,
@@ -121,6 +133,19 @@
           3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 
0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15)
  verifyPValues(longley, expectedPValues, "longley")
  
+ # Spearman's
+expectedCorrelation <- matrix(c(
+          1, 0.982352941176471, 0.985294117647059, 0.564705882352941, 
0.2264705882352941, 0.976470588235294,
+          0.976470588235294, 0.982352941176471, 1, 0.997058823529412, 
0.664705882352941, 0.2205882352941176,
+          0.997058823529412, 0.997058823529412, 0.985294117647059, 
0.997058823529412, 1, 0.638235294117647,
+          0.2235294117647059, 0.9941176470588236, 0.9941176470588236, 
0.564705882352941, 0.664705882352941,
+          0.638235294117647, 1, -0.3411764705882353, 0.685294117647059, 
0.685294117647059, 0.2264705882352941,
+          0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 1, 
0.2264705882352941, 0.2264705882352941,
+          0.976470588235294, 0.997058823529412, 0.9941176470588236, 
0.685294117647059, 0.2264705882352941, 1, 1,
+          0.976470588235294, 0.997058823529412, 0.9941176470588236, 
0.685294117647059, 0.2264705882352941, 1, 1),
+          nrow = 7, ncol = 7, byrow = TRUE)
+ verifySpearmansCorrelation(longley, expectedCorrelation, "longley")
+  
  # Swiss Fertility
  
  fertility <- matrix(c(80.2,17.0,15,12,9.96,
@@ -171,15 +196,14 @@
   44.7,46.6,16,29,50.43,
   42.8,27.7,22,29,58.33),
   nrow = 47, ncol = 5, byrow = TRUE)
-   
-expectedCorrelation <- matrix(c(
-         1.0000000000000000, 0.3530791836199747, -0.6458827064572875, 
-0.6637888570350691,  0.4636847006517939,
-         0.3530791836199747, 1.0000000000000000,-0.6865422086171366, 
-0.6395225189483201, 0.4010950530487398,
-         -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 
0.6984152962884830, -0.5727418060641666,
-         -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 
1.0000000000000000, -0.1538589170909148,
-          0.4636847006517939, 0.4010950530487398, -0.5727418060641666, 
-0.1538589170909148, 1.0000000000000000),
+  expectedCorrelation <- matrix(c(
+          1, 0.3530791836199747, -0.6458827064572875, -0.663788857035069, 
0.463684700651794,
+          0.3530791836199747, 1, -0.6865422086171366, -0.63952251894832, 
0.4010950530487398,
+         -0.6458827064572875, -0.6865422086171366, 1, 0.698415296288483, 
-0.572741806064167,
+         -0.663788857035069, -0.63952251894832, 0.698415296288483, 1, 
-0.1538589170909148,
+          0.463684700651794, 0.4010950530487398, -0.572741806064167, 
-0.1538589170909148, 1),
           nrow = 5, ncol = 5, byrow = TRUE)
-verifyCorrelation(fertility, expectedCorrelation, "swiss fertility")
+verifyPearsonsCorrelation(fertility, expectedCorrelation, "swiss fertility")
 
 expectedPValues <- c(
           0.01491720061472623,
@@ -188,4 +212,14 @@
           0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 
0.301807756132683)
 verifyPValues(fertility, expectedPValues, "swiss fertility")
 
+# Spearman's
+expectedCorrelation <- matrix(c(
+           1, 0.2426642769364176, -0.660902996352354, -0.443257690360988, 
0.4136455623012432,
+           0.2426642769364176, 1, -0.598859938748963, -0.650463814145816, 
0.2886878090882852,
+          -0.660902996352354, -0.598859938748963, 1, 0.674603831406147, 
-0.4750575257171745,
+          -0.443257690360988, -0.650463814145816, 0.674603831406147, 1, 
-0.1444163088302244,
+           0.4136455623012432, 0.2886878090882852, -0.4750575257171745, 
-0.1444163088302244, 1),
+          nrow = 5, ncol = 5, byrow = TRUE)
+ verifySpearmansCorrelation(fertility, expectedCorrelation, "swiss fertility")
+
 displayDashes(WIDTH)

Added: 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java?rev=778085&view=auto
==============================================================================
--- 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
 (added)
+++ 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/SpearmansRankCorrelationTest.java
 Sun May 24 05:04:21 2009
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.TestUtils;
+import org.apache.commons.math.linear.DenseRealMatrix;
+import org.apache.commons.math.linear.RealMatrix;
+
+/**
+ * Test cases for Spearman's rank correlation
+ * 
+ * @since 2.0
+ * @version $Revision:$ $Date:$
+ */
+public class SpearmansRankCorrelationTest extends PearsonsCorrelationTest {
+
+    protected void setUp() throws Exception {
+        super.setUp();
+    }
+
+    protected void tearDown() throws Exception {
+        super.tearDown();
+    }
+    
+    /**
+     * Test Longley dataset against R.
+     */
+    public void testLongly() throws Exception {  
+        RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+        SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); 
+        RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+        double[] rData = new double[] {
+                1, 0.982352941176471, 0.985294117647059, 0.564705882352941, 
0.2264705882352941, 0.976470588235294,
+                0.976470588235294, 0.982352941176471, 1, 0.997058823529412, 
0.664705882352941, 0.2205882352941176,
+                0.997058823529412, 0.997058823529412, 0.985294117647059, 
0.997058823529412, 1, 0.638235294117647,
+                0.2235294117647059, 0.9941176470588236, 0.9941176470588236, 
0.564705882352941, 0.664705882352941,
+                0.638235294117647, 1, -0.3411764705882353, 0.685294117647059, 
0.685294117647059, 0.2264705882352941,
+                0.2205882352941176, 0.2235294117647059, -0.3411764705882353, 
1, 0.2264705882352941, 0.2264705882352941,
+                0.976470588235294, 0.997058823529412, 0.9941176470588236, 
0.685294117647059, 0.2264705882352941, 1, 1,
+                0.976470588235294, 0.997058823529412, 0.9941176470588236, 
0.685294117647059, 0.2264705882352941, 1, 1
+        }; 
+        TestUtils.assertEquals("Spearman's correlation matrix", 
createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
+    }
+    
+    /**
+     * Test R swiss fertility dataset.
+     */
+    public void testSwiss() throws Exception {  
+        RealMatrix matrix = createRealMatrix(swissData, 47, 5);
+        SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); 
+        RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
+        double[] rData = new double[] {
+                1, 0.2426642769364176, -0.660902996352354, -0.443257690360988, 
0.4136455623012432,
+                0.2426642769364176, 1, -0.598859938748963, -0.650463814145816, 
0.2886878090882852,
+               -0.660902996352354, -0.598859938748963, 1, 0.674603831406147, 
-0.4750575257171745,
+               -0.443257690360988, -0.650463814145816, 0.674603831406147, 1, 
-0.1444163088302244,
+                0.4136455623012432, 0.2886878090882852, -0.4750575257171745, 
-0.1444163088302244, 1
+        }; 
+        TestUtils.assertEquals("Spearman's correlation matrix", 
createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
+    }
+    
+    /**
+     * Constant column
+     */
+    public void testConstant() {
+        double[] noVariance = new double[] {1, 1, 1, 1};
+        double[] values = new double[] {1, 2, 3, 4};
+        assertTrue(Double.isNaN(new 
SpearmansCorrelation().correlation(noVariance, values)));
+    }
+    
+    /**
+     * Insufficient data
+     */ 
+    public void testInsufficientData() {
+        double[] one = new double[] {1};
+        double[] two = new double[] {2};
+        try {
+            new SpearmansCorrelation().correlation(one, two);
+            fail("Expecting IllegalArgumentException");
+        } catch (IllegalArgumentException ex) {
+            // Expected
+        }
+        RealMatrix matrix = new DenseRealMatrix(new double[][] {{0},{1}});
+        try {
+            new SpearmansCorrelation(matrix);
+            fail("Expecting IllegalArgumentException");
+        } catch (IllegalArgumentException ex) {
+            // Expected
+        }
+    }
+    
+    public void testConsistency() {
+        RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
+        SpearmansCorrelation corrInstance = new SpearmansCorrelation(matrix); 
+        double[][] data = matrix.getData();
+        double[] x = matrix.getColumn(0);
+        double[] y = matrix.getColumn(1);
+        assertEquals(new SpearmansCorrelation().correlation(x, y), 
+                corrInstance.getCorrelationMatrix().getEntry(0, 1), 
Double.MIN_VALUE);
+        TestUtils.assertEquals("Correlation matrix", 
corrInstance.getCorrelationMatrix(),
+                new SpearmansCorrelation().computeCorrelationMatrix(data), 
Double.MIN_VALUE);
+    }
+    
+    // Not relevant here
+    public void testStdErrorConsistency() throws Exception {}
+    public void testCovarianceConsistency() throws Exception {}
+     
+}


Reply via email to