Author: psteitz
Date: Tue Jan  8 21:16:00 2008
New Revision: 610274

URL: http://svn.apache.org/viewvc?rev=610274&view=rev
Log:
Added check and rescaling of expected counts to sum to sum of expected
counts if necessary in ChiSquare test.
JIRA: MATH-175
Reported and patched by Carl Anderson.

Modified:
    
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
    commons/proper/math/trunk/src/test/R/chiSquareTestCases
    
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
    
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
    commons/proper/math/trunk/xdocs/changes.xml

Modified: 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
 (original)
+++ 
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
 Tue Jan  8 21:16:00 2008
@@ -50,6 +50,11 @@
         setDistribution(x);
     }
      /**
+     * [EMAIL PROTECTED]
+     * <p><strong>Note: </strong>This implementation rescales the 
+     * <code>expected</code> array if necessary to ensure that the sum of the
+     * expected and observed counts are equal.</p>
+     * 
      * @param observed array of observed frequency counts
      * @param expected array of expected frequency counts
      * @return chi-square test statistic
@@ -58,8 +63,6 @@
      */
     public double chiSquare(double[] expected, long[] observed)
         throws IllegalArgumentException {
-        double sumSq = 0.0d;
-        double dev = 0.0d;
         if ((expected.length < 2) || (expected.length != observed.length)) {
             throw new IllegalArgumentException(
                     "observed, expected array lengths incorrect");
@@ -68,14 +71,38 @@
             throw new IllegalArgumentException(
                 "observed counts must be non-negative and expected counts must 
be postive");
         }
+        double sumExpected = 0d;
+        double sumObserved = 0d;
+        for (int i = 0; i < observed.length; i++) {
+            sumExpected += expected[i];
+            sumObserved += observed[i];
+        }
+        double ratio = 1.0d;
+        boolean rescale = false;
+        if (Math.abs(sumExpected - sumObserved) > 10E-6) {
+            ratio = sumObserved / sumExpected;
+            rescale = true;
+        }
+        double sumSq = 0.0d;
+        double dev = 0.0d;
         for (int i = 0; i < observed.length; i++) {
-            dev = ((double) observed[i] - expected[i]);
-            sumSq += dev * dev / expected[i];
+            if (rescale) {
+                dev = ((double) observed[i] - ratio * expected[i]);
+                sumSq += dev * dev / (ratio * expected[i]);
+            } else {
+                dev = ((double) observed[i] - expected[i]);
+                sumSq += dev * dev / expected[i];
+            }
         }
         return sumSq;
     }
 
     /**
+     * [EMAIL PROTECTED]
+     * <p><strong>Note: </strong>This implementation rescales the 
+     * <code>expected</code> array if necessary to ensure that the sum of the
+     * expected and observed counts are equal.</p>
+     * 
      * @param observed array of observed frequency counts
      * @param expected array of exptected frequency counts
      * @return p-value
@@ -90,6 +117,11 @@
     }
 
     /**
+     * [EMAIL PROTECTED]
+     * <p><strong>Note: </strong>This implementation rescales the 
+     * <code>expected</code> array if necessary to ensure that the sum of the
+     * expected and observed counts are equal.</p>
+     * 
      * @param observed array of observed frequency counts
      * @param expected array of exptected frequency counts
      * @param alpha significance level of the test

Modified: commons/proper/math/trunk/src/test/R/chiSquareTestCases
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/chiSquareTestCases?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/chiSquareTestCases (original)
+++ commons/proper/math/trunk/src/test/R/chiSquareTestCases Tue Jan  8 21:16:00 
2008
@@ -49,8 +49,9 @@
 
 verifyHomogeneity <- function(obs, exp, expectedP, expectedStat, 
   tol, desc) {
-    chi <- sum((obs - exp)^2/exp)
-    p <- 1 - pchisq(sum((obs - exp)^2/exp), length(obs) - 1)
+    results <- chisq.test(obs,p=exp,rescale.p=TRUE)
+    chi <- results$statistic
+    p <- results$p.value
     if (assertEquals(expectedP, p, tol, "p-value")) {
         displayPadded(c(desc, " p-value test"), SUCCEEDED, WIDTH)
     } else {
@@ -73,14 +74,14 @@
 
 observed <- c(500, 623, 72, 70, 31)
 expected <- c(485, 541, 82, 61, 37)
-verifyHomogeneity(observed, expected,  0.002512096, 16.4131070362, tol,
-   "testChiSquare2")
+verifyHomogeneity(observed, expected, 0.06051952647453607, 9.023307936427388,
+   tol, "testChiSquare2")
 
 observed <- c(2372383, 584222, 257170, 17750155, 7903832, 489265,
               209628, 393899)
 expected <- c(3389119.5, 649136.6, 285745.4, 25357364.76, 11291189.78,
               543628.0, 232921.0, 437665.75)
-verifyHomogeneity(observed, expected, 0, 3624883.342907764, tol,
+verifyHomogeneity(observed, expected, 0, 114875.90421929007, tol,
    "testChiSquareLargeTestStatistic")
 
 counts <- matrix(c(40, 22, 43, 91, 21, 28, 60, 10, 22), nc = 3);

Modified: 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
 (original)
+++ 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
 Tue Jan  8 21:16:00 2008
@@ -57,10 +57,10 @@
         
         long[] observed1 = { 500, 623, 72, 70, 31 };
         double[] expected1 = { 485, 541, 82, 61, 37 };
-        assertEquals( "chi-square test statistic", 16.4131070362, 
testStatistic.chiSquare(expected1, observed1), 1E-10);
-        assertEquals("chi-square p-value", 0.002512096, 
testStatistic.chiSquareTest(expected1, observed1), 1E-9);
-        assertTrue("chi-square test reject", 
testStatistic.chiSquareTest(expected1, observed1, 0.003));
-        assertTrue("chi-square test accept", 
!testStatistic.chiSquareTest(expected1, observed1, 0.002));
+        assertEquals( "chi-square test statistic", 9.023307936427388, 
testStatistic.chiSquare(expected1, observed1), 1E-10);
+        assertEquals("chi-square p-value", 0.06051952647453607, 
testStatistic.chiSquareTest(expected1, observed1), 1E-9);
+        assertTrue("chi-square test reject", 
testStatistic.chiSquareTest(expected1, observed1, 0.08));
+        assertTrue("chi-square test accept", 
!testStatistic.chiSquareTest(expected1, observed1, 0.05));
 
         try {
             testStatistic.chiSquareTest(expected1, observed1, 95);
@@ -181,7 +181,7 @@
         double cst = csti.chiSquareTest(exp, obs); 
         assertEquals("chi-square p-value", 0.0, cst, 1E-3);
         assertEquals( "chi-square test statistic", 
-                3624883.342907764, testStatistic.chiSquare(exp, obs), 1E-9);
+                114875.90421929007, testStatistic.chiSquare(exp, obs), 1E-9);
     }
     
     /** Contingency table containing zeros - PR # 32531 */

Modified: 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
 (original)
+++ 
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
 Tue Jan  8 21:16:00 2008
@@ -55,10 +55,10 @@
         
         long[] observed1 = { 500, 623, 72, 70, 31 };
         double[] expected1 = { 485, 541, 82, 61, 37 };
-        assertEquals( "chi-square test statistic", 16.4131070362, 
TestUtils.chiSquare(expected1, observed1), 1E-10);
-        assertEquals("chi-square p-value", 0.002512096, 
TestUtils.chiSquareTest(expected1, observed1), 1E-9);
-        assertTrue("chi-square test reject", 
TestUtils.chiSquareTest(expected1, observed1, 0.003));
-        assertTrue("chi-square test accept", 
!TestUtils.chiSquareTest(expected1, observed1, 0.002));
+        assertEquals( "chi-square test statistic", 9.023307936427388, 
TestUtils.chiSquare(expected1, observed1), 1E-10);
+        assertEquals("chi-square p-value", 0.06051952647453607, 
TestUtils.chiSquareTest(expected1, observed1), 1E-9);
+        assertTrue("chi-square test reject", 
TestUtils.chiSquareTest(expected1, observed1, 0.07));
+        assertTrue("chi-square test accept", 
!TestUtils.chiSquareTest(expected1, observed1, 0.05));
 
         try {
             TestUtils.chiSquareTest(expected1, observed1, 95);
@@ -179,7 +179,7 @@
         double cst = csti.chiSquareTest(exp, obs); 
         assertEquals("chi-square p-value", 0.0, cst, 1E-3);
         assertEquals( "chi-square test statistic", 
-                3624883.342907764, TestUtils.chiSquare(exp, obs), 1E-9);
+                114875.90421929007, TestUtils.chiSquare(exp, obs), 1E-9);
     }
     
     /** Contingency table containing zeros - PR # 32531 */

Modified: commons/proper/math/trunk/xdocs/changes.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/xdocs/changes.xml?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- commons/proper/math/trunk/xdocs/changes.xml (original)
+++ commons/proper/math/trunk/xdocs/changes.xml Tue Jan  8 21:16:00 2008
@@ -116,6 +116,10 @@
         Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
         by exploiting the the fact that this method has access to the full
         array of data values.
+      </action>
+      <action dev="psteitz" type="fix" issue="MATH-175" due-to="Carl Anderson">
+        Added check and rescaling of expected counts to sum to sum of expected
+        counts if necessary in ChiSquare test.
       </action>     
     </release>
     <release version="1.1" date="2005-12-17"  


Reply via email to