Author: erans Date: Tue Mar 15 12:15:02 2011 New Revision: 1081744 URL: http://svn.apache.org/viewvc?rev=1081744&view=rev Log: MATH-546 Wrong variable type ("int" instead of "double").
Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java commons/proper/math/trunk/src/site/xdoc/changes.xml commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java?rev=1081744&r1=1081743&r2=1081744&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java Tue Mar 15 12:15:02 2011 @@ -172,7 +172,7 @@ public class KMeansPlusPlusClusterer<T e while (resultSet.size() < k) { // For each data point x, compute D(x), the distance between x and // the nearest center that has already been chosen. - int sum = 0; + double sum = 0; for (int i = 0; i < pointSet.size(); i++) { final T p = pointSet.get(i); final Cluster<T> nearest = getNearestCluster(resultSet, p); Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=1081744&r1=1081743&r2=1081744&view=diff ============================================================================== --- commons/proper/math/trunk/src/site/xdoc/changes.xml (original) +++ commons/proper/math/trunk/src/site/xdoc/changes.xml Tue Mar 15 12:15:02 2011 @@ -52,6 +52,9 @@ The <action> type attribute can be add,u If the output is not quite correct, check for invisible trailing spaces! --> <release version="3.0" date="TBD" description="TBD"> + <action dev="erans" type="fix" issue="MATH-546" due-to="Nate Paymer"> + Fixed bug in "KMeansPlusPlusClusterer". + </action> <action dev="erans" type="update" issue="MATH-542"> All exceptions defined in Commons Math provide a context and a compound message list. Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java?rev=1081744&r1=1081743&r2=1081744&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java Tue Mar 15 12:15:02 2011 @@ -20,7 +20,9 @@ package org.apache.commons.math.stat.clu import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Random; @@ -166,4 +168,84 @@ public class KMeansPlusPlusClustererTest } + /** + * A helper class for testSmallDistances(). This class is similar to EuclideanIntegerPoint, but + * it defines a different distanceFrom() method that tends to return distances less than 1. + */ + private class CloseIntegerPoint implements Clusterable<CloseIntegerPoint> { + public CloseIntegerPoint(EuclideanIntegerPoint point) { + euclideanPoint = point; + } + + public double distanceFrom(CloseIntegerPoint p) { + return euclideanPoint.distanceFrom(p.euclideanPoint) * 0.001; + } + + public CloseIntegerPoint centroidOf(Collection<CloseIntegerPoint> p) { + Collection<EuclideanIntegerPoint> euclideanPoints = + new ArrayList<EuclideanIntegerPoint>(); + for (CloseIntegerPoint point : p) { + euclideanPoints.add(point.euclideanPoint); + } + return new CloseIntegerPoint(euclideanPoint.centroidOf(euclideanPoints)); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof CloseIntegerPoint)) { + return false; + } + CloseIntegerPoint p = (CloseIntegerPoint) o; + + return euclideanPoint.equals(p.euclideanPoint); + } + + @Override + public int hashCode() { + return euclideanPoint.hashCode(); + } + + private EuclideanIntegerPoint euclideanPoint; + } + + /** + * Test points that are very close together. See issue MATH-546. + */ + @Test + public void testSmallDistances() { + // Create a bunch of CloseIntegerPoints. Most are identical, but one is different by a + // small distance. + int[] repeatedArray = { 0 }; + int[] uniqueArray = { 1 }; + CloseIntegerPoint repeatedPoint = + new CloseIntegerPoint(new EuclideanIntegerPoint(repeatedArray)); + CloseIntegerPoint uniquePoint = + new CloseIntegerPoint(new EuclideanIntegerPoint(uniqueArray)); + + Collection<CloseIntegerPoint> points = new ArrayList<CloseIntegerPoint>(); + final int NUM_REPEATED_POINTS = 10 * 1000; + for (int i = 0; i < NUM_REPEATED_POINTS; ++i) { + points.add(repeatedPoint); + } + points.add(uniquePoint); + + // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial + // cluster centers). + final long RANDOM_SEED = 0; + final int NUM_CLUSTERS = 2; + final int NUM_ITERATIONS = 0; + KMeansPlusPlusClusterer<CloseIntegerPoint> clusterer = + new KMeansPlusPlusClusterer<CloseIntegerPoint>(new Random(RANDOM_SEED)); + List<Cluster<CloseIntegerPoint>> clusters = + clusterer.cluster(points, NUM_CLUSTERS, NUM_ITERATIONS); + + // Check that one of the chosen centers is the unique point. + boolean uniquePointIsCenter = false; + for (Cluster<CloseIntegerPoint> cluster : clusters) { + if (cluster.getCenter().equals(uniquePoint)) { + uniquePointIsCenter = true; + } + } + assertTrue(uniquePointIsCenter); + } }