Author: luc
Date: Wed Mar 23 13:19:23 2011
New Revision: 1084577

URL: http://svn.apache.org/viewvc?rev=1084577&view=rev
Log:
Added a consistency check for number of points with respect to the number of 
clusters in Kmeans++ clustering

Modified:
    
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
    commons/proper/math/trunk/src/site/xdoc/changes.xml
    
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java

Modified: 
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java?rev=1084577&r1=1084576&r2=1084577&view=diff
==============================================================================
--- 
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
 (original)
+++ 
commons/proper/math/trunk/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
 Wed Mar 23 13:19:23 2011
@@ -23,8 +23,12 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.commons.math.exception.ConvergenceException;
+import org.apache.commons.math.exception.MathIllegalArgumentException;
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.exception.NumberIsTooSmallException;
 import org.apache.commons.math.exception.util.LocalizedFormats;
 import org.apache.commons.math.stat.descriptive.moment.Variance;
+import org.apache.commons.math.util.MathUtils;
 
 /**
  * Clustering algorithm based on David Arthur and Sergei Vassilvitski 
k-means++ algorithm.
@@ -88,9 +92,21 @@ public class KMeansPlusPlusClusterer<T e
      * @param maxIterations the maximum number of iterations to run the 
algorithm
      *     for.  If negative, no maximum will be used
      * @return a list of clusters containing the points
+     * @throws MathIllegalArgumentException if the data points are null or the 
number
+     *     of clusters is larger than the number of data points
      */
-    public List<Cluster<T>> cluster(final Collection<T> points,
-                                    final int k, final int maxIterations) {
+    public List<Cluster<T>> cluster(final Collection<T> points, final int k,
+                                    final int maxIterations)
+        throws MathIllegalArgumentException {
+
+        // sanity checks
+        MathUtils.checkNotNull(points);
+        
+        // number of clusters has to be smaller or equal the number of data 
points
+        if (points.size() < k) {
+            throw new NumberIsTooSmallException(points.size(), k, false);
+        }
+        
         // create the initial clusters
         List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);
         assignPointsToClusters(clusters, points);

Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=1084577&r1=1084576&r2=1084577&view=diff
==============================================================================
--- commons/proper/math/trunk/src/site/xdoc/changes.xml (original)
+++ commons/proper/math/trunk/src/site/xdoc/changes.xml Wed Mar 23 13:19:23 2011
@@ -52,6 +52,10 @@ The <action> type attribute can be add,u
     If the output is not quite correct, check for invisible trailing spaces!
      -->
     <release version="3.0" date="TBD" description="TBD">
+      <action dev="luc" type="add" issue="MATH-436" due-to="Thomas Neidhart">
+        Added a consistency check for number of points with respect to the 
number
+        of clusters in Kmeans++ clustering
+      </action>
       <action dev="mikl" type="add" issue="MATH-437">
         Added two sided Kolmogorov-Smirnov distribution using modified 
         Marsaglia et al. (2003) implementation and quick decisions for certain 

Modified: 
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java?rev=1084577&r1=1084576&r2=1084577&view=diff
==============================================================================
--- 
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
 (original)
+++ 
commons/proper/math/trunk/src/test/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClustererTest.java
 Wed Mar 23 13:19:23 2011
@@ -24,6 +24,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Random;
 
+import org.apache.commons.math.exception.NumberIsTooSmallException;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -246,4 +247,26 @@ public class KMeansPlusPlusClustererTest
         }
         Assert.assertTrue(uniquePointIsCenter);
     }
+    
+    /**
+     * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
+     */
+    @Test(expected=NumberIsTooSmallException.class)
+    public void testPerformClusterAnalysisToManyClusters() {
+        KMeansPlusPlusClusterer<EuclideanIntegerPoint> transformer = 
+            new KMeansPlusPlusClusterer<EuclideanIntegerPoint>(
+                    new Random(1746432956321l));
+        
+        EuclideanIntegerPoint[] points = new EuclideanIntegerPoint[] {
+            new EuclideanIntegerPoint(new int[] {
+                1959, 325100
+            }), new EuclideanIntegerPoint(new int[] {
+                1960, 373200
+            })
+        };
+        
+        transformer.cluster(Arrays.asList(points), 3, 1);
+
+    }
+
 }


Reply via email to