Sampling from a "List". (n, k) combinations and shuffling code previously in "CollectionSampler" class.
Project: http://git-wip-us.apache.org/repos/asf/commons-rng/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-rng/commit/f7925ec8 Tree: http://git-wip-us.apache.org/repos/asf/commons-rng/tree/f7925ec8 Diff: http://git-wip-us.apache.org/repos/asf/commons-rng/diff/f7925ec8 Branch: refs/heads/master Commit: f7925ec82d84bf3efb9b8a10d281a031bf098ea3 Parents: 6b8bbc0 Author: Gilles <er...@apache.org> Authored: Mon Nov 21 15:00:15 2016 +0100 Committer: Gilles <er...@apache.org> Committed: Mon Nov 21 15:00:15 2016 +0100 ---------------------------------------------------------------------- .../commons/rng/sampling/ListSampler.java | 112 ++++++++++++ .../commons/rng/sampling/ListSamplerTest.java | 171 +++++++++++++++++++ 2 files changed, 283 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-rng/blob/f7925ec8/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java ---------------------------------------------------------------------- diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java new file mode 100644 index 0000000..9947dce --- /dev/null +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ListSampler.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rng.sampling; + +import java.util.List; +import java.util.ArrayList; + +import org.apache.commons.rng.UniformRandomProvider; + +/** + * Sampling from a {@link List}. + * + * This class also contains utilities for shuffling a {@link List} in-place. + * + * @since 1.0 + */ +public class ListSampler { + /** + * Class contains only static methods. + */ + private ListSampler() {} + + /** + * Generates a list of size {@code k} whose entries are selected + * randomly, without repetition, from the items in the given + * {@code collection}. + * + * <p> + * Sampling is without replacement; but if the source collection + * contains identical objects, the sample may include repeats. + * </p> + * + * @param rng Generator of uniformly distributed random numbers. + * @param collection List to be sampled from. + * @param k Size of the returned sample. + * @throws IllegalArgumentException if {@code k <= 0} or + * {@code k > collection.size()}. + * @return a shuffled sample from the source collection. + */ + public static <T> List<T> sample(UniformRandomProvider rng, + List<T> collection, + int k) { + final int n = collection.size(); + final PermutationSampler p = new PermutationSampler(rng, n, k); + final List<T> result = new ArrayList<T>(k); + final int[] index = p.sample(); + + for (int i = 0; i < k; i++) { + result.add(collection.get(index[i])); + } + + return result; + } + + /** + * Shuffles the entries of the given array. + * + * @see #shuffle(List,int,boolean,UniformRandomProvider) + * + * @param <T> Type of the list items. + * @param list List whose entries will be shuffled (in-place). + * @param rng Random number generator. + */ + public static <T> void shuffle(List<T> list, + UniformRandomProvider rng) { + shuffle(list, 0, false, rng); + } + + /** + * Shuffles the entries of the given array, using the + * <a href="http://en.wikipedia.org/wiki/FisherâYates_shuffle#The_modern_algorithm"> + * Fisher-Yates</a> algorithm. + * The {@code start} and {@code pos} parameters select which part + * of the array is randomized and which is left untouched. + * + * @param <T> Type of the list items. + * @param list List whose entries will be shuffled (in-place). + * @param start Index at which shuffling begins. + * @param towardHead Shuffling is performed for index positions between + * {@code start} and either the end (if {@code false}) or the beginning + * (if {@code true}) of the array. + * @param rng Random number generator. + */ + public static <T> void shuffle(List<T> list, + int start, + boolean towardHead, + UniformRandomProvider rng) { + final int len = list.size(); + final int[] indices = PermutationSampler.natural(len); + PermutationSampler.shuffle(indices, start, towardHead, rng); + + final ArrayList<T> items = new ArrayList<T>(list); + for (int i = 0; i < len; i++) { + list.set(i, items.get(indices[i])); + } + } +} http://git-wip-us.apache.org/repos/asf/commons-rng/blob/f7925ec8/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java ---------------------------------------------------------------------- diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java new file mode 100644 index 0000000..652f8bb --- /dev/null +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ListSamplerTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling; + +import java.util.Set; +import java.util.HashSet; +import java.util.List; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Arrays; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.commons.math3.stat.inference.ChiSquareTest; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.simple.RandomSource; + +/** + * Tests for {@link ListSampler}. + */ +public class ListSamplerTest { + private final UniformRandomProvider rng = RandomSource.create(RandomSource.ISAAC, 6543432321L); + private final ChiSquareTest chiSquareTest = new ChiSquareTest(); + + @Test + public void testSample() { + final String[][] c = { { "0", "1" }, { "0", "2" }, { "0", "3" }, { "0", "4" }, + { "1", "2" }, { "1", "3" }, { "1", "4" }, + { "2", "3" }, { "2", "4" }, + { "3", "4" } }; + final long[] observed = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + final double[] expected = { 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 }; + + final HashSet<String> cPop = new HashSet<String>(); // {0, 1, 2, 3, 4}. + for (int i = 0; i < 5; i++) { + cPop.add(Integer.toString(i)); + } + + final List<Set<String>> sets = new ArrayList<Set<String>>(); // 2-sets from 5. + for (int i = 0; i < 10; i++) { + final HashSet<String> hs = new HashSet<String>(); + hs.add(c[i][0]); + hs.add(c[i][1]); + sets.add(hs); + } + + for (int i = 0; i < 1000; i++) { + observed[findSample(sets, ListSampler.sample(rng, new ArrayList<String>(cPop), 2))]++; + } + + // Pass if we cannot reject null hypothesis that distributions are the same. + Assert.assertFalse(chiSquareTest.chiSquareTest(expected, observed, 0.001)); + } + + @Test + public void testSampleWhole() { + // Sample of size = size of collection must return the same collection. + final List<String> list = new ArrayList<String>(); + list.add("one"); + + final List<String> one = ListSampler.sample(rng, list, 1); + Assert.assertEquals(1, one.size()); + Assert.assertTrue(one.contains("one")); + } + + @Test(expected=IllegalArgumentException.class) + public void testSamplePrecondition1() { + // Must fail for sample size > collection size. + final List<String> list = new ArrayList<String>(); + list.add("one"); + ListSampler.sample(rng, list, 2); + } + + @Test(expected=IllegalArgumentException.class) + public void testSamplePrecondition2() { + // Must fail for empty collection. + final List<String> list = new ArrayList<String>(); + ListSampler.sample(rng, list, 1); + } + + @Test + public void testShuffleTail() { + final List<Integer> orig = new ArrayList<Integer>(); + for (int i = 0; i < 10; i++) { + orig.add((i + 1) * rng.nextInt()); + } + final List<Integer> list = new ArrayList<Integer>(orig); + + final int start = 4; + ListSampler.shuffle(list, start, false, rng); + + // Ensure that all entries below index "start" did not move. + for (int i = 0; i < start; i++) { + Assert.assertEquals(orig.get(i), list.get(i)); + } + + // Ensure that at least one entry has moved. + boolean ok = false; + for (int i = start; i < orig.size() - 1; i++) { + if (!orig.get(i).equals(list.get(i))) { + ok = true; + break; + } + } + Assert.assertTrue(ok); + } + + @Test + public void testShuffleHead() { + final List<Integer> orig = new ArrayList<Integer>(); + for (int i = 0; i < 10; i++) { + orig.add((i + 1) * rng.nextInt()); + } + final List<Integer> list = new ArrayList<Integer>(orig); + + final int start = 4; + ListSampler.shuffle(list, start, true, rng); + + // Ensure that all entries above index "start" did not move. + for (int i = start + 1; i < orig.size(); i++) { + Assert.assertEquals(orig.get(i), list.get(i)); + } + + // Ensure that at least one entry has moved. + boolean ok = false; + for (int i = 0; i <= start; i++) { + if (!orig.get(i).equals(list.get(i))) { + ok = true; + break; + } + } + Assert.assertTrue(ok); + } + + //// Support methods. + + private <T extends Set<String>> int findSample(List<T> u, + Collection<String> sampList) { + final String[] samp = sampList.toArray(new String[sampList.size()]); + for (int i = 0; i < u.size(); i++) { + final T set = u.get(i); + final HashSet<String> sampSet = new HashSet<String>(); + for (int j = 0; j < samp.length; j++) { + sampSet.add(samp[j]); + } + if (set.equals(sampSet)) { + return i; + } + } + + Assert.fail("Sample not found: { " + + samp[0] + ", " + samp[1] + " }"); + return -1; + } +}