Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java?rev=1461862&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java (added) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013 @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.ml.clustering; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.ml.distance.EuclideanDistance; +import org.apache.commons.math3.random.JDKRandomGenerator; +import org.apache.commons.math3.random.RandomGenerator; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class KMeansPlusPlusClustererTest { + + private RandomGenerator random; + + @Before + public void setUp() { + random = new JDKRandomGenerator(); + random.setSeed(1746432956321l); + } + + /** + * JIRA: MATH-305 + * + * Two points, one cluster, one iteration + */ + @Test + public void testPerformClusterAnalysisDegenerate() { + KMeansPlusPlusClusterer<DoublePoint> transformer = + new KMeansPlusPlusClusterer<DoublePoint>(1, 1); + + DoublePoint[] points = new DoublePoint[] { + new DoublePoint(new int[] { 1959, 325100 }), + new DoublePoint(new int[] { 1960, 373200 }), }; + List<? extends Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points)); + Assert.assertEquals(1, clusters.size()); + Assert.assertEquals(2, (clusters.get(0).getPoints().size())); + DoublePoint pt1 = new DoublePoint(new int[] { 1959, 325100 }); + DoublePoint pt2 = new DoublePoint(new int[] { 1960, 373200 }); + Assert.assertTrue(clusters.get(0).getPoints().contains(pt1)); + Assert.assertTrue(clusters.get(0).getPoints().contains(pt2)); + + } + + @Test + public void testCertainSpace() { + KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = { + KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE, + KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER, + KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT + }; + for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) { + int numberOfVariables = 27; + // initialise testvalues + int position1 = 1; + int position2 = position1 + numberOfVariables; + int position3 = position2 + numberOfVariables; + int position4 = position3 + numberOfVariables; + // testvalues will be multiplied + int multiplier = 1000000; + + DoublePoint[] breakingPoints = new DoublePoint[numberOfVariables]; + // define the space which will break the cluster algorithm + for (int i = 0; i < numberOfVariables; i++) { + int points[] = { position1, position2, position3, position4 }; + // multiply the values + for (int j = 0; j < points.length; j++) { + points[j] = points[j] * multiplier; + } + DoublePoint DoublePoint = new DoublePoint(points); + breakingPoints[i] = DoublePoint; + position1 = position1 + numberOfVariables; + position2 = position2 + numberOfVariables; + position3 = position3 + numberOfVariables; + position4 = position4 + numberOfVariables; + } + + for (int n = 2; n < 27; ++n) { + KMeansPlusPlusClusterer<DoublePoint> transformer = + new KMeansPlusPlusClusterer<DoublePoint>(n, 100, new EuclideanDistance(), random, strategy); + + List<? extends Cluster<DoublePoint>> clusters = + transformer.cluster(Arrays.asList(breakingPoints)); + + Assert.assertEquals(n, clusters.size()); + int sum = 0; + for (Cluster<DoublePoint> cluster : clusters) { + sum += cluster.getPoints().size(); + } + Assert.assertEquals(numberOfVariables, sum); + } + } + + } + + /** + * A helper class for testSmallDistances(). This class is similar to DoublePoint, but + * it defines a different distanceFrom() method that tends to return distances less than 1. + */ + private class CloseDistance extends EuclideanDistance { + private static final long serialVersionUID = 1L; + + @Override + public double compute(double[] a, double[] b) { + return super.compute(a, b) * 0.001; + } + } + + /** + * Test points that are very close together. See issue MATH-546. + */ + @Test + public void testSmallDistances() { + // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a + // small distance. + int[] repeatedArray = { 0 }; + int[] uniqueArray = { 1 }; + DoublePoint repeatedPoint = new DoublePoint(repeatedArray); + DoublePoint uniquePoint = new DoublePoint(uniqueArray); + + Collection<DoublePoint> points = new ArrayList<DoublePoint>(); + final int NUM_REPEATED_POINTS = 10 * 1000; + for (int i = 0; i < NUM_REPEATED_POINTS; ++i) { + points.add(repeatedPoint); + } + points.add(uniquePoint); + + // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial + // cluster centers). + final long RANDOM_SEED = 0; + final int NUM_CLUSTERS = 2; + final int NUM_ITERATIONS = 0; + random.setSeed(RANDOM_SEED); + + KMeansPlusPlusClusterer<DoublePoint> clusterer = + new KMeansPlusPlusClusterer<DoublePoint>(NUM_CLUSTERS, NUM_ITERATIONS, + new CloseDistance(), random); + List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points); + + // Check that one of the chosen centers is the unique point. + boolean uniquePointIsCenter = false; + for (CentroidCluster<DoublePoint> cluster : clusters) { + if (cluster.getCenter().equals(uniquePoint)) { + uniquePointIsCenter = true; + } + } + Assert.assertTrue(uniquePointIsCenter); + } + + /** + * 2 variables cannot be clustered into 3 clusters. See issue MATH-436. + */ + @Test(expected=NumberIsTooSmallException.class) + public void testPerformClusterAnalysisToManyClusters() { + KMeansPlusPlusClusterer<DoublePoint> transformer = + new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random); + + DoublePoint[] points = new DoublePoint[] { + new DoublePoint(new int[] { + 1959, 325100 + }), new DoublePoint(new int[] { + 1960, 373200 + }) + }; + + transformer.cluster(Arrays.asList(points)); + + } + +}
Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:keywords = Id Revision HeadURL Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/KMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java?rev=1461862&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java (added) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java Wed Mar 27 21:48:10 2013 @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.ml.clustering; + + +import java.util.Arrays; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class MultiKMeansPlusPlusClustererTest { + + @Test + public void dimension2() { + MultiKMeansPlusPlusClusterer<DoublePoint> transformer = + new MultiKMeansPlusPlusClusterer<DoublePoint>( + new KMeansPlusPlusClusterer<DoublePoint>(3, 10), 5); + + DoublePoint[] points = new DoublePoint[] { + + // first expected cluster + new DoublePoint(new int[] { -15, 3 }), + new DoublePoint(new int[] { -15, 4 }), + new DoublePoint(new int[] { -15, 5 }), + new DoublePoint(new int[] { -14, 3 }), + new DoublePoint(new int[] { -14, 5 }), + new DoublePoint(new int[] { -13, 3 }), + new DoublePoint(new int[] { -13, 4 }), + new DoublePoint(new int[] { -13, 5 }), + + // second expected cluster + new DoublePoint(new int[] { -1, 0 }), + new DoublePoint(new int[] { -1, -1 }), + new DoublePoint(new int[] { 0, -1 }), + new DoublePoint(new int[] { 1, -1 }), + new DoublePoint(new int[] { 1, -2 }), + + // third expected cluster + new DoublePoint(new int[] { 13, 3 }), + new DoublePoint(new int[] { 13, 4 }), + new DoublePoint(new int[] { 14, 4 }), + new DoublePoint(new int[] { 14, 7 }), + new DoublePoint(new int[] { 16, 5 }), + new DoublePoint(new int[] { 16, 6 }), + new DoublePoint(new int[] { 17, 4 }), + new DoublePoint(new int[] { 17, 7 }) + + }; + List<CentroidCluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points)); + + Assert.assertEquals(3, clusters.size()); + boolean cluster1Found = false; + boolean cluster2Found = false; + boolean cluster3Found = false; + double epsilon = 1e-6; + for (CentroidCluster<DoublePoint> cluster : clusters) { + Clusterable center = cluster.getCenter(); + double[] point = center.getPoint(); + if (point[0] < 0) { + cluster1Found = true; + Assert.assertEquals(8, cluster.getPoints().size()); + Assert.assertEquals(-14, point[0], epsilon); + Assert.assertEquals( 4, point[1], epsilon); + } else if (point[1] < 0) { + cluster2Found = true; + Assert.assertEquals(5, cluster.getPoints().size()); + Assert.assertEquals( 0, point[0], epsilon); + Assert.assertEquals(-1, point[1], epsilon); + } else { + cluster3Found = true; + Assert.assertEquals(8, cluster.getPoints().size()); + Assert.assertEquals(15, point[0], epsilon); + Assert.assertEquals(5, point[1], epsilon); + } + } + Assert.assertTrue(cluster1Found); + Assert.assertTrue(cluster2Found); + Assert.assertTrue(cluster3Found); + + } + +} Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:keywords = Id Revision HeadURL Propchange: commons/proper/math/trunk/src/test/java/org/apache/commons/math3/ml/clustering/MultiKMeansPlusPlusClustererTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain