Sampling from a "List".

(n, k) combinations and shuffling code previously in "CollectionSampler" class.


Project: http://git-wip-us.apache.org/repos/asf/commons-rng/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-rng/commit/f7925ec8
Tree: http://git-wip-us.apache.org/repos/asf/commons-rng/tree/f7925ec8
Diff: http://git-wip-us.apache.org/repos/asf/commons-rng/diff/f7925ec8

Branch: refs/heads/master
Commit: f7925ec82d84bf3efb9b8a10d281a031bf098ea3
Parents: 6b8bbc0
Author: Gilles <er...@apache.org>
Authored: Mon Nov 21 15:00:15 2016 +0100
Committer: Gilles <er...@apache.org>
Committed: Mon Nov 21 15:00:15 2016 +0100

----------------------------------------------------------------------
 .../commons/rng/sampling/ListSampler.java       | 112 ++++++++++++
 .../commons/rng/sampling/ListSamplerTest.java   | 171 +++++++++++++++++++
 2 files changed, 283 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-rng/blob/f7925ec8/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java
----------------------------------------------------------------------
diff --git 
a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java
 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java
new file mode 100644
index 0000000..9947dce
--- /dev/null
+++ 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.sampling;
+
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.commons.rng.UniformRandomProvider;
+
+/**
+ * Sampling from a {@link List}.
+ *
+ * This class also contains utilities for shuffling a {@link List} in-place.
+ *
+ * @since 1.0
+ */
+public class ListSampler {
+    /**
+     * Class contains only static methods.
+     */
+    private ListSampler() {}
+
+    /**
+     * Generates a list of size {@code k} whose entries are selected
+     * randomly, without repetition, from the items in the given
+     * {@code collection}.
+     *
+     * <p>
+     * Sampling is without replacement; but if the source collection
+     * contains identical objects, the sample may include repeats.
+     * </p>
+     *
+     * @param rng Generator of uniformly distributed random numbers.
+     * @param collection List to be sampled from.
+     * @param k Size of the returned sample.
+     * @throws IllegalArgumentException if {@code k <= 0} or
+     * {@code k > collection.size()}.
+     * @return a shuffled sample from the source collection.
+     */
+    public static <T> List<T> sample(UniformRandomProvider rng,
+                                     List<T> collection,
+                                     int k) {
+        final int n = collection.size();
+        final PermutationSampler p = new PermutationSampler(rng, n, k);
+        final List<T> result = new ArrayList<T>(k);
+        final int[] index = p.sample();
+
+        for (int i = 0; i < k; i++) {
+            result.add(collection.get(index[i]));
+        }
+
+        return result;
+    }
+
+    /**
+     * Shuffles the entries of the given array.
+     *
+     * @see #shuffle(List,int,boolean,UniformRandomProvider)
+     *
+     * @param <T> Type of the list items.
+     * @param list List whose entries will be shuffled (in-place).
+     * @param rng Random number generator.
+     */
+    public static <T> void shuffle(List<T> list,
+                                   UniformRandomProvider rng) {
+        shuffle(list, 0, false, rng);
+    }
+
+    /**
+     * Shuffles the entries of the given array, using the
+     * <a 
href="http://en.wikipedia.org/wiki/Fisher–Yates_shuffle#The_modern_algorithm";>
+     * Fisher-Yates</a> algorithm.
+     * The {@code start} and {@code pos} parameters select which part
+     * of the array is randomized and which is left untouched.
+     *
+     * @param <T> Type of the list items.
+     * @param list List whose entries will be shuffled (in-place).
+     * @param start Index at which shuffling begins.
+     * @param towardHead Shuffling is performed for index positions between
+     * {@code start} and either the end (if {@code false}) or the beginning
+     * (if {@code true}) of the array.
+     * @param rng Random number generator.
+     */
+    public static <T> void shuffle(List<T> list,
+                                   int start,
+                                   boolean towardHead,
+                                   UniformRandomProvider rng) {
+        final int len = list.size();
+        final int[] indices = PermutationSampler.natural(len);
+        PermutationSampler.shuffle(indices, start, towardHead, rng);
+
+        final ArrayList<T> items = new ArrayList<T>(list);
+        for (int i = 0; i < len; i++) {
+            list.set(i, items.get(indices[i]));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-rng/blob/f7925ec8/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java
----------------------------------------------------------------------
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java
new file mode 100644
index 0000000..652f8bb
--- /dev/null
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling;
+
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Arrays;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.simple.RandomSource;
+
+/**
+ * Tests for {@link ListSampler}.
+ */
+public class ListSamplerTest {
+    private final UniformRandomProvider rng = 
RandomSource.create(RandomSource.ISAAC, 6543432321L);
+    private final ChiSquareTest chiSquareTest = new ChiSquareTest();
+
+    @Test
+    public void testSample() {
+        final String[][] c = { { "0", "1" }, { "0", "2" }, { "0", "3" }, { 
"0", "4" },
+                               { "1", "2" }, { "1", "3" }, { "1", "4" },
+                               { "2", "3" }, { "2", "4" },
+                               { "3", "4" } };
+        final long[] observed = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+        final double[] expected = { 100, 100, 100, 100, 100, 100, 100, 100, 
100, 100 };
+
+        final HashSet<String> cPop = new HashSet<String>(); // {0, 1, 2, 3, 4}.
+        for (int i = 0; i < 5; i++) {
+            cPop.add(Integer.toString(i));
+        }
+
+        final List<Set<String>> sets = new ArrayList<Set<String>>(); // 2-sets 
from 5.
+        for (int i = 0; i < 10; i++) {
+            final HashSet<String> hs = new HashSet<String>();
+            hs.add(c[i][0]);
+            hs.add(c[i][1]);
+            sets.add(hs);
+        }
+
+        for (int i = 0; i < 1000; i++) {
+            observed[findSample(sets, ListSampler.sample(rng, new 
ArrayList<String>(cPop), 2))]++;
+        }
+
+        // Pass if we cannot reject null hypothesis that distributions are the 
same.
+        Assert.assertFalse(chiSquareTest.chiSquareTest(expected, observed, 
0.001));
+    }
+
+    @Test
+    public void testSampleWhole() {
+        // Sample of size = size of collection must return the same collection.
+        final List<String> list = new ArrayList<String>();
+        list.add("one");
+
+        final List<String> one = ListSampler.sample(rng, list, 1);
+        Assert.assertEquals(1, one.size());
+        Assert.assertTrue(one.contains("one"));
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testSamplePrecondition1() {
+        // Must fail for sample size > collection size.
+        final List<String> list = new ArrayList<String>();
+        list.add("one");
+        ListSampler.sample(rng, list, 2);
+    }
+
+    @Test(expected=IllegalArgumentException.class)
+    public void testSamplePrecondition2() {
+        // Must fail for empty collection.
+        final List<String> list = new ArrayList<String>();
+        ListSampler.sample(rng, list, 1);
+    }
+
+    @Test
+    public void testShuffleTail() {
+        final List<Integer> orig = new ArrayList<Integer>();
+        for (int i = 0; i < 10; i++) {
+            orig.add((i + 1) * rng.nextInt());
+        }
+        final List<Integer> list = new ArrayList<Integer>(orig);
+
+        final int start = 4;
+        ListSampler.shuffle(list, start, false, rng);
+
+        // Ensure that all entries below index "start" did not move.
+        for (int i = 0; i < start; i++) {
+            Assert.assertEquals(orig.get(i), list.get(i));
+        }
+
+        // Ensure that at least one entry has moved.
+        boolean ok = false;
+        for (int i = start; i < orig.size() - 1; i++) {
+            if (!orig.get(i).equals(list.get(i))) {
+                ok = true;
+                break;
+            }
+        }
+        Assert.assertTrue(ok);
+    }
+
+    @Test
+    public void testShuffleHead() {
+        final List<Integer> orig = new ArrayList<Integer>();
+        for (int i = 0; i < 10; i++) {
+            orig.add((i + 1) * rng.nextInt());
+        }
+        final List<Integer> list = new ArrayList<Integer>(orig);
+
+        final int start = 4;
+        ListSampler.shuffle(list, start, true, rng);
+
+        // Ensure that all entries above index "start" did not move.
+        for (int i = start + 1; i < orig.size(); i++) {
+            Assert.assertEquals(orig.get(i), list.get(i));
+        }
+
+        // Ensure that at least one entry has moved.
+        boolean ok = false;
+        for (int i = 0; i <= start; i++) {
+            if (!orig.get(i).equals(list.get(i))) {
+                ok = true;
+                break;
+            }
+        }
+        Assert.assertTrue(ok);
+    }
+
+    //// Support methods.
+
+    private <T extends Set<String>> int findSample(List<T> u,
+                                                   Collection<String> 
sampList) {
+        final String[] samp = sampList.toArray(new String[sampList.size()]);
+        for (int i = 0; i < u.size(); i++) {
+            final T set = u.get(i);
+            final HashSet<String> sampSet = new HashSet<String>();
+            for (int j = 0; j < samp.length; j++) {
+                sampSet.add(samp[j]);
+            }
+            if (set.equals(sampSet)) {
+                return i;
+            }
+        }
+
+        Assert.fail("Sample not found: { " +
+                    samp[0] + ", " + samp[1] + " }");
+        return -1;
+    }
+}

Reply via email to