Repository: spark
Updated Branches:
  refs/heads/master bc95fe08d -> fbad72288


[SPARK-3077][MLLIB] fix some chisq-test

- promote nullHypothesis field in ChiSqTestResult to TestResult. Every test 
should have a null hypothesis
- correct null hypothesis statement for independence test
- p-value: 0.01 -> 0.1

Author: Xiangrui Meng <[email protected]>

Closes #1982 from mengxr/fix-chisq and squashes the following commits:

5f0de02 [Xiangrui Meng] make ChiSqTestResult constructor package private
bc74ea1 [Xiangrui Meng] update chisq-test


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fbad7228
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fbad7228
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fbad7228

Branch: refs/heads/master
Commit: fbad72288d8b6e641b00417a544cae6e8bfef2d7
Parents: bc95fe0
Author: Xiangrui Meng <[email protected]>
Authored: Sat Aug 16 21:16:27 2014 -0700
Committer: Xiangrui Meng <[email protected]>
Committed: Sat Aug 16 21:16:27 2014 -0700

----------------------------------------------------------------------
 .../spark/mllib/stat/test/ChiSqTest.scala       |  2 +-
 .../spark/mllib/stat/test/TestResult.scala      | 28 +++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fbad7228/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 8f67527..215de95 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -56,7 +56,7 @@ private[stat] object ChiSqTest extends Logging {
   object NullHypothesis extends Enumeration {
     type NullHypothesis = Value
     val goodnessOfFit = Value("observed follows the same distribution as 
expected.")
-    val independence = Value("observations in each column are statistically 
independent.")
+    val independence = Value("the occurrence of the outcomes is statistically 
independent.")
   }
 
   // Method identification based on input methodName string

http://git-wip-us.apache.org/repos/asf/spark/blob/fbad7228/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
index 2f27862..4784f9e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -45,6 +45,11 @@ trait TestResult[DF] {
   def statistic: Double
 
   /**
+   * Null hypothesis of the test.
+   */
+  def nullHypothesis: String
+
+  /**
    * String explaining the hypothesis test result.
    * Specific classes implementing this trait should override this method to 
output test-specific
    * information.
@@ -53,13 +58,13 @@ trait TestResult[DF] {
 
     // String explaining what the p-value indicates.
     val pValueExplain = if (pValue <= 0.01) {
-      "Very strong presumption against null hypothesis."
+      s"Very strong presumption against null hypothesis: $nullHypothesis."
     } else if (0.01 < pValue && pValue <= 0.05) {
-      "Strong presumption against null hypothesis."
-    } else if (0.05 < pValue && pValue <= 0.01) {
-      "Low presumption against null hypothesis."
+      s"Strong presumption against null hypothesis: $nullHypothesis."
+    } else if (0.05 < pValue && pValue <= 0.1) {
+      s"Low presumption against null hypothesis: $nullHypothesis."
     } else {
-      "No presumption against null hypothesis."
+      s"No presumption against null hypothesis: $nullHypothesis."
     }
 
     s"degrees of freedom = ${degreesOfFreedom.toString} \n" +
@@ -70,19 +75,18 @@ trait TestResult[DF] {
 
 /**
  * :: Experimental ::
- * Object containing the test results for the chi squared hypothesis test.
+ * Object containing the test results for the chi-squared hypothesis test.
  */
 @Experimental
-class ChiSqTestResult(override val pValue: Double,
+class ChiSqTestResult private[stat] (override val pValue: Double,
     override val degreesOfFreedom: Int,
     override val statistic: Double,
     val method: String,
-    val nullHypothesis: String) extends TestResult[Int] {
+    override val nullHypothesis: String) extends TestResult[Int] {
 
   override def toString: String = {
-    "Chi squared test summary: \n" +
-    s"method: $method \n" +
-    s"null hypothesis: $nullHypothesis \n" +
-    super.toString
+    "Chi squared test summary:\n" +
+      s"method: $method\n" +
+      super.toString
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to