Author: luc Date: Sat Nov 20 21:31:21 2010 New Revision: 1037332 URL: http://svn.apache.org/viewvc?rev=1037332&view=rev Log: Added a normalization feature to transform samples so they have zero mean and unit standard deviation Jira: MATH-426
Modified: commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/StatUtils.java commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/StatUtilsTest.java Modified: commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/StatUtils.java URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/StatUtils.java?rev=1037332&r1=1037331&r2=1037332&view=diff ============================================================================== --- commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/StatUtils.java (original) +++ commons/proper/math/branches/MATH_2_X/src/main/java/org/apache/commons/math/stat/StatUtils.java Sat Nov 20 21:31:21 2010 @@ -18,6 +18,7 @@ package org.apache.commons.math.stat; import org.apache.commons.math.MathRuntimeException; import org.apache.commons.math.exception.util.LocalizedFormats; +import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math.stat.descriptive.UnivariateStatistic; import org.apache.commons.math.stat.descriptive.moment.GeometricMean; import org.apache.commons.math.stat.descriptive.moment.Mean; @@ -628,5 +629,34 @@ public final class StatUtils { } return (sum1 - (sum2 * sum2 / n)) / (n - 1); } + + + /** + * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1. + * + * @param sample sample to normalize + * @return normalized (standardized) sample + */ + public static double[] normalize(final double[] sample) { + DescriptiveStatistics stats = new DescriptiveStatistics(); + + // Add the data from the series to stats + for (int i = 0; i < sample.length; i++) { + stats.addValue(sample[i]); + } + + // Compute mean and standard deviation + double mean = stats.getMean(); + double standardDeviation = stats.getStandardDeviation(); + + // initialize the standardizedSample, which has the same length as the sample + double[] standardizedSample = new double[sample.length]; + + for (int i = 0; i < sample.length; i++) { + // z = (x- mean)/standardDeviation + standardizedSample[i] = (sample[i] - mean) / standardDeviation; + } + return standardizedSample; + } } Modified: commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml?rev=1037332&r1=1037331&r2=1037332&view=diff ============================================================================== --- commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml (original) +++ commons/proper/math/branches/MATH_2_X/src/site/xdoc/changes.xml Sat Nov 20 21:31:21 2010 @@ -52,6 +52,9 @@ The <action> type attribute can be add,u If the output is not quite correct, check for invisible trailing spaces! --> <release version="2.2" date="TBD" description="TBD"> + <action dev="luc" type="fix" issue="MATH-426" due-to="Erik van Ingen"> + Added a normalization feature to transform samples so they have zero mean and unit standard deviation + </action> <action dev="erans" type="add" issue="MATH-440"> Created "MathUserException" class to convey cause of failure between layers of user code separated by a layer of Commons-Math code. Deprecated Modified: commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/StatUtilsTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/StatUtilsTest.java?rev=1037332&r1=1037331&r2=1037332&view=diff ============================================================================== --- commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/StatUtilsTest.java (original) +++ commons/proper/math/branches/MATH_2_X/src/test/java/org/apache/commons/math/stat/StatUtilsTest.java Sat Nov 20 21:31:21 2010 @@ -19,6 +19,7 @@ package org.apache.commons.math.stat; import junit.framework.TestCase; import org.apache.commons.math.TestUtils; +import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math.util.FastMath; /** @@ -420,4 +421,48 @@ public final class StatUtilsTest extends assertEquals(FastMath.exp(0.5 * StatUtils.sumLog(test, 0, 2)), StatUtils.geometricMean(test, 0, 2), Double.MIN_VALUE); } + + + /** + * Run the test with the values 50 and 100 and assume standardized values + */ + + public void testNormalize1() { + double sample[] = { 50, 100 }; + double expectedSample[] = { -25 / Math.sqrt(1250), 25 / Math.sqrt(1250) }; + double[] out = StatUtils.normalize(sample); + for (int i = 0; i < out.length; i++) { + assertEquals(out[i], expectedSample[i]); + } + + } + + /** + * Run with 77 random values, assuming that the outcome has a mean of 0 and a standard deviation of 1 with a + * precision of 1E-10. + */ + + public void testNormalize2() { + // create an sample with 77 values + int length = 77; + double sample[] = new double[length]; + for (int i = 0; i < length; i++) { + sample[i] = Math.random(); + } + // normalize this sample + double standardizedSample[] = StatUtils.normalize(sample); + + DescriptiveStatistics stats = new DescriptiveStatistics(); + // Add the data from the array + for (int i = 0; i < length; i++) { + stats.addValue(standardizedSample[i]); + } + // the calculations do have a limited precision + double distance = 1E-10; + // check the mean an standard deviation + assertEquals(0.0, stats.getMean(), distance); + assertEquals(1.0, stats.getStandardDeviation(), distance); + + } + }