MATH-1270

Various SOFM visualizations: unified distance matrix, hit histogram,
smoothed data histogram, topographic error.


Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/ce131449
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/ce131449
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/ce131449

Branch: refs/heads/master
Commit: ce131449a4ca0b06c6bb27ee5d8d6d89b1467bbd
Parents: 999761b
Author: Gilles <er...@apache.org>
Authored: Mon Sep 14 02:08:30 2015 +0200
Committer: Gilles <er...@apache.org>
Committed: Mon Sep 14 02:08:30 2015 +0200

----------------------------------------------------------------------
 .../ml/neuralnet/twod/util/HitHistogram.java    |  85 ++++++++
 .../twod/util/SmoothedDataHistogram.java        |  97 +++++++++
 .../twod/util/TopographicErrorHistogram.java    |  91 +++++++++
 .../twod/util/UnifiedDistanceMatrix.java        | 203 +++++++++++++++++++
 4 files changed, 476 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/ce131449/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java
 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java
new file mode 100644
index 0000000..536acfc
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math4.ml.neuralnet.twod.util;
+
+import org.apache.commons.math4.ml.neuralnet.MapUtils;
+import org.apache.commons.math4.ml.neuralnet.Neuron;
+import org.apache.commons.math4.ml.neuralnet.Network;
+import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
+import org.apache.commons.math4.ml.distance.DistanceMeasure;
+
+/**
+ * Computes the hit histogram.
+ * Each bin will contain the number of data for which the corresponding
+ * neuron is the best matching unit.
+ */
+public class HitHistogram implements MapDataVisualization {
+    /** Distance. */
+    private final DistanceMeasure distance;
+    /** Whether to compute relative bin counts. */
+    private final boolean normalizeCount;
+
+    /**
+     * @param normalizeCount Whether to compute relative bin counts.
+     * If {@code true}, the data count in each bin will be divided by the total
+     * number of samples.
+     * @param distance Distance.
+     */
+    public HitHistogram(boolean normalizeCount,
+                        DistanceMeasure distance) {
+        this.normalizeCount = normalizeCount;
+        this.distance = distance;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double[][] computeImage(NeuronSquareMesh2D map,
+                                   Iterable<double[]> data) {
+        final int nR = map.getNumberOfRows();
+        final int nC = map.getNumberOfColumns();
+
+        final Network net = map.getNetwork();
+        final LocationFinder finder = new LocationFinder(map);
+
+        // Totla number of samples.
+        int numSamples = 0;
+        // Hit bins.
+        final double[][] hit = new double[nR][nC];
+
+        for (double[] sample : data) {
+            final Neuron best = MapUtils.findBest(sample, map, distance);
+
+            final LocationFinder.Location loc = finder.getLocation(best);
+            final int row = loc.getRow();
+            final int col = loc.getColumn();
+            hit[row][col] += 1;
+
+            ++numSamples;
+        }
+
+        if (normalizeCount) {
+            for (int r = 0; r < nR; r++) {
+                for (int c = 0; c < nC; c++) {
+                    hit[r][c] /= numSamples;
+                }
+            }
+        }
+
+        return hit;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-math/blob/ce131449/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java
 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java
new file mode 100644
index 0000000..bdab570
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math4.ml.neuralnet.twod.util;
+
+import org.apache.commons.math4.ml.neuralnet.MapUtils;
+import org.apache.commons.math4.ml.neuralnet.Neuron;
+import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
+import org.apache.commons.math4.ml.distance.DistanceMeasure;
+import org.apache.commons.math4.exception.NumberIsTooSmallException;
+
+/**
+ * Visualization of high-dimensional data projection on a 2D-map.
+ * The method is described in
+ * <quote>
+ *  <em>Using Smoothed Data Histograms for Cluster Visualization in 
Self-Organizing Maps</em>
+ *  <br>
+ *  by Elias Pampalk, Andreas Rauber and Dieter Merkl.
+ * </quote>
+ */
+public class SmoothedDataHistogram implements MapDataVisualization {
+    /** Smoothing parameter. */
+    private final int smoothingBins;
+    /** Distance. */
+    private final DistanceMeasure distance;
+    /** Normalization factor. */
+    private final double membershipNormalization;
+
+    /**
+     * @param smoothingBins Number of bins.
+     * @param distance Distance.
+     */
+    public SmoothedDataHistogram(int smoothingBins,
+                                 DistanceMeasure distance) {
+        this.smoothingBins = smoothingBins;
+        this.distance = distance;
+
+        double sum = 0;
+        for (int i = 0; i < smoothingBins; i++) {
+            sum += smoothingBins - i;
+        }
+
+        this.membershipNormalization = 1d / sum;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws NumberIsTooSmallException if the size of the {@code map}
+     * is smaller than the number of {@link 
#SmoothedDataHistogram(int,DistanceMeasure)
+     * smoothing bins}.
+     */
+    @Override
+    public double[][] computeImage(NeuronSquareMesh2D map,
+                                   Iterable<double[]> data) {
+        final int nR = map.getNumberOfRows();
+        final int nC = map.getNumberOfColumns();
+
+        final int mapSize = nR * nC;
+        if (mapSize < smoothingBins) {
+            throw new NumberIsTooSmallException(mapSize, smoothingBins, true);
+        }
+
+        final LocationFinder finder = new LocationFinder(map);
+
+        // Histogram bins.
+        final double[][] histo = new double[nR][nC];
+
+        for (double[] sample : data) {
+            final Neuron[] sorted = MapUtils.sort(sample,
+                                                  map.getNetwork(),
+                                                  distance);
+            for (int i = 0; i < smoothingBins; i++) {
+                final LocationFinder.Location loc = 
finder.getLocation(sorted[i]);
+                final int row = loc.getRow();
+                final int col = loc.getColumn();
+                histo[row][col] += (smoothingBins - i) * 
membershipNormalization;
+            }
+        }
+
+        return histo;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-math/blob/ce131449/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java
 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java
new file mode 100644
index 0000000..0543ce0
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math4.ml.neuralnet.twod.util;
+
+import org.apache.commons.math4.ml.neuralnet.MapUtils;
+import org.apache.commons.math4.ml.neuralnet.Neuron;
+import org.apache.commons.math4.ml.neuralnet.Network;
+import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
+import org.apache.commons.math4.ml.distance.DistanceMeasure;
+import org.apache.commons.math4.util.Pair;
+
+/**
+ * Computes the topographic error histogram.
+ * Each bin will contain the number of data for which the first and
+ * second best matching units are not adjacent in the map.
+ */
+public class TopographicErrorHistogram implements MapDataVisualization {
+    /** Distance. */
+    private final DistanceMeasure distance;
+    /** Whether to compute relative bin counts. */
+    private final boolean relativeCount;
+
+    /**
+     * @param relativeCount Whether to compute relative bin counts.
+     * If {@code true}, the data count in each bin will be divided by the total
+     * number of samples mapped to the neuron represented by that bin.
+     * @param distance Distance.
+     */
+    public TopographicErrorHistogram(boolean relativeCount,
+                                     DistanceMeasure distance) {
+        this.relativeCount = relativeCount;
+        this.distance = distance;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double[][] computeImage(NeuronSquareMesh2D map,
+                                   Iterable<double[]> data) {
+        final int nR = map.getNumberOfRows();
+        final int nC = map.getNumberOfColumns();
+
+        final Network net = map.getNetwork();
+        final LocationFinder finder = new LocationFinder(map);
+
+        // Hit bins.
+        final int[][] hit = new int[nR][nC];
+        // Error bins.
+        final double[][] error = new double[nR][nC];
+
+        for (double[] sample : data) {
+            final Pair<Neuron, Neuron> p = 
MapUtils.findBestAndSecondBest(sample, map, distance);
+            final Neuron best = p.getFirst();
+
+            final LocationFinder.Location loc = finder.getLocation(best);
+            final int row = loc.getRow();
+            final int col = loc.getColumn();
+            hit[row][col] += 1;
+
+            if (!net.getNeighbours(best).contains(p.getSecond())) {
+                // Increment count if first and second best matching units
+                // are not neighbours.
+                error[row][col] += 1;
+            }
+        }
+
+        if (relativeCount) {
+            for (int r = 0; r < nR; r++) {
+                for (int c = 0; c < nC; c++) {
+                    error[r][c] /= hit[r][c];
+                }
+            }
+        }
+
+        return error;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-math/blob/ce131449/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java
 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java
new file mode 100644
index 0000000..4b6f67a
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math4.ml.neuralnet.twod.util;
+
+import java.util.Collection;
+import org.apache.commons.math4.ml.neuralnet.Neuron;
+import org.apache.commons.math4.ml.neuralnet.Network;
+import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
+import org.apache.commons.math4.ml.distance.DistanceMeasure;
+
+/**
+ * <a href="http://en.wikipedia.org/wiki/U-Matrix";>U-Matrix</a>
+ * visualization of high-dimensional data projection.
+ */
+public class UnifiedDistanceMatrix implements MapVisualization {
+    /** Whether to show distance between each pair of neighbouring units. */
+    private final boolean individualDistances;
+    /** Distance. */
+    private final DistanceMeasure distance;
+
+    /** Simple constructor.
+     * @param individualDistances If {@code true}, the 8 individual
+     * inter-units distances will be {@link #computeImage(NeuronSquareMesh2D)
+     * computed}.  They will be stored in additional pixels around each of
+     * the original units of the 2D-map.  The value zero will be stored in the
+     * pixel corresponding to the location of a unit of the 2D-map.
+     * If {@code false}, only the average distance between a unit and all its
+     * neighbours will be computed (and stored in the pixel corresponding to
+     * that unit of the 2D-map).  In that case, the number of neighbours taken
+     * into account depends on the network's
+     * {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood
+     * neighbourhood type}.
+     * @param distance Distance.
+     */
+    public UnifiedDistanceMatrix(boolean individualDistances,
+                                 DistanceMeasure distance) {
+        this.individualDistances = individualDistances;
+        this.distance = distance;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double[][] computeImage(NeuronSquareMesh2D map) {
+        if (individualDistances) {
+            return individualDistances(map);
+        } else {
+            return averageDistances(map);
+        }
+    }
+
+    /**
+     * Computes the distances between a unit of the map and its
+     * neighbours.
+     * The image will contain more pixels than the number of neurons
+     * in the given {@code map} because each neuron has 8 neighbours.
+     * The value zero will be stored in the pixels corresponding to
+     * the location of a map unit.
+     *
+     * @param map Map.
+     * @return an image representing the individual distances.
+     */
+    private double[][] individualDistances(NeuronSquareMesh2D map) {
+        final int numRows = map.getNumberOfRows();
+        final int numCols = map.getNumberOfColumns();
+
+        final double[][] uMatrix = new double[numRows * 2 + 1][numCols * 2 + 
1];
+
+        for (int i = 0; i < numRows; i++) {
+            // Current unit's row index in result image.
+            final int iR = 2 * i + 1;
+
+            for (int j = 0; j < numCols; j++) {
+                // Current unit's column index in result image.
+                final int jR = 2 * j + 1;
+
+                final double[] current = map.getNeuron(i, j).getFeatures();
+                Neuron neighbour;
+
+                // Top-left neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.LEFT,
+                                          
NeuronSquareMesh2D.VerticalDirection.UP);
+                if (neighbour != null) {
+                    uMatrix[iR - 1][jR - 1] = distance.compute(current,
+                                                               
neighbour.getFeatures());
+                }
+
+                // Top-center neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.CENTER,
+                                          
NeuronSquareMesh2D.VerticalDirection.UP);
+                if (neighbour != null) {
+                    uMatrix[iR - 1][jR] = distance.compute(current,
+                                                           
neighbour.getFeatures());
+                }
+
+                // Top-right neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
+                                          
NeuronSquareMesh2D.VerticalDirection.UP);
+                if (neighbour != null) {
+                    uMatrix[iR - 1][jR + 1] = distance.compute(current,
+                                                               
neighbour.getFeatures());
+                }
+
+                // Left neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.LEFT,
+                                          
NeuronSquareMesh2D.VerticalDirection.CENTER);
+                if (neighbour != null) {
+                    uMatrix[iR][jR - 1] = distance.compute(current,
+                                                           
neighbour.getFeatures());
+                }
+
+                // Right neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
+                                          
NeuronSquareMesh2D.VerticalDirection.CENTER);
+                if (neighbour != null) {
+                    uMatrix[iR][jR + 1] = distance.compute(current,
+                                                           
neighbour.getFeatures());
+                }
+
+                // Bottom-left neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.LEFT,
+                                          
NeuronSquareMesh2D.VerticalDirection.DOWN);
+                if (neighbour != null) {
+                    uMatrix[iR + 1][jR - 1] = distance.compute(current,
+                                                               
neighbour.getFeatures());
+                }
+
+                // Bottom-center neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.CENTER,
+                                          
NeuronSquareMesh2D.VerticalDirection.DOWN);
+                if (neighbour != null) {
+                    uMatrix[iR + 1][jR] = distance.compute(current,
+                                                           
neighbour.getFeatures());
+                }
+
+                // Bottom-right neighbour.
+                neighbour = map.getNeuron(i, j,
+                                          
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
+                                          
NeuronSquareMesh2D.VerticalDirection.DOWN);
+                if (neighbour != null) {
+                    uMatrix[iR + 1][jR + 1] = distance.compute(current,
+                                                               
neighbour.getFeatures());
+                }
+            }
+        }
+
+        return uMatrix;
+    }
+
+    /**
+     * Computes the distances between a unit of the map and its neighbours.
+     *
+     * @param map Map.
+     * @return an image representing the average distances.
+     */
+    private double[][] averageDistances(NeuronSquareMesh2D map) {
+        final int numRows = map.getNumberOfRows();
+        final int numCols = map.getNumberOfColumns();
+        final double[][] uMatrix = new double[numRows][numCols];
+
+        final Network net = map.getNetwork();
+
+        for (int i = 0; i < numRows; i++) {
+            for (int j = 0; j < numCols; j++) {
+                final Neuron neuron = map.getNeuron(i, j);
+                final Collection<Neuron> neighbours = 
net.getNeighbours(neuron);
+                final double[] features = neuron.getFeatures();
+
+                double d = 0;
+                int count = 0;
+                for (Neuron n : neighbours) {
+                    ++count;
+                    d += distance.compute(features, n.getFeatures());
+                }
+
+                uMatrix[i][j] = d / count;
+            }
+        }
+
+        return uMatrix;
+    }
+}

Reply via email to