API change. "CollectionSampler" now provides a simple API to randomly select a single element from a collection.
Project: http://git-wip-us.apache.org/repos/asf/commons-rng/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-rng/commit/5cd88b10 Tree: http://git-wip-us.apache.org/repos/asf/commons-rng/tree/5cd88b10 Diff: http://git-wip-us.apache.org/repos/asf/commons-rng/diff/5cd88b10 Branch: refs/heads/master Commit: 5cd88b10dcc4f05c1d70ef49c43f15bc2b518f3a Parents: 144f88b Author: Gilles <er...@apache.org> Authored: Mon Nov 21 14:53:30 2016 +0100 Committer: Gilles <er...@apache.org> Committed: Mon Nov 21 14:53:30 2016 +0100 ---------------------------------------------------------------------- .../commons/rng/sampling/CollectionSampler.java | 84 ++-------- .../rng/sampling/CollectionSamplerTest.java | 153 +++---------------- 2 files changed, 30 insertions(+), 207 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-rng/blob/5cd88b10/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/CollectionSampler.java ---------------------------------------------------------------------- diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/CollectionSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/CollectionSampler.java index 222c8a5..740fa21 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/CollectionSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/CollectionSampler.java @@ -28,45 +28,34 @@ import org.apache.commons.rng.UniformRandomProvider; * * @param <T> Type of items in the collection. * - * This class also contains utilities for shuffling a generic {@link List}. - * * @since 1.0 */ public class CollectionSampler<T> { /** Collection to be sampled from. */ private final ArrayList<T> items; - /** Permutation. */ - private final PermutationSampler permutation; - /** Size of returned list. */ - private final int size; + /** RNG. */ + private final UniformRandomProvider rng; /** * Creates a sampler. * - * The {@link #sample()} method will generate a collection of - * size {@code k} whose entries are selected randomly, without - * repetition, from the items in the given {@code collection}. - * * @param rng Generator of uniformly distributed random numbers. * @param collection Collection to be sampled. * A (shallow) copy will be stored in the created instance. - * @param k Size of the permutation. - * @throws IllegalArgumentException if {@code k <= 0} or - * {@code k > collection.size()}. + * @throws IllegalArgumentException if {@code collection.size() <= 0}. */ public CollectionSampler(UniformRandomProvider rng, - Collection<T> collection, - int k) { - permutation = new PermutationSampler(rng, collection.size(), k); + Collection<T> collection) { + if (collection.size() <= 0) { + throw new IllegalArgumentException("Empty collection"); + } + + this.rng = rng; items = new ArrayList<T>(collection); - size = k; } /** - * Creates a list of objects selected randomly, without repetition, - * from the collection provided at - * {@link #CollectionSampler(UniformRandomProvider,Collection,int) - * construction}. + * Picks one of the items in the given {@code collection}. * * <p> * Sampling is without replacement; but if the source collection @@ -79,56 +68,7 @@ public class CollectionSampler<T> { * * @return a random sample. */ - public Collection<T> sample() { - final ArrayList<T> result = new ArrayList<T>(size); - final int[] index = permutation.sample(); - - for (int i = 0; i < size; i++) { - result.add(items.get(index[i])); - } - - return result; - } - /** - * Shuffles the entries of the given array. - * - * @see #shuffle(List,int,boolean,UniformRandomProvider) - * - * @param <S> Type of the list items. - * @param list List whose entries will be shuffled (in-place). - * @param rng Random number generator. - */ - public static <S> void shuffle(List<S> list, - UniformRandomProvider rng) { - shuffle(list, 0, false, rng); - } - - /** - * Shuffles the entries of the given array, using the - * <a href="http://en.wikipedia.org/wiki/FisherâYates_shuffle#The_modern_algorithm"> - * Fisher-Yates</a> algorithm. - * The {@code start} and {@code pos} parameters select which part - * of the array is randomized and which is left untouched. - * - * @param <S> Type of the list items. - * @param list List whose entries will be shuffled (in-place). - * @param start Index at which shuffling begins. - * @param towardHead Shuffling is performed for index positions between - * {@code start} and either the end (if {@code false}) or the beginning - * (if {@code true}) of the array. - * @param rng Random number generator. - */ - public static <S> void shuffle(List<S> list, - int start, - boolean towardHead, - UniformRandomProvider rng) { - final int len = list.size(); - final int[] indices = PermutationSampler.natural(len); - PermutationSampler.shuffle(indices, start, towardHead, rng); - - final ArrayList<S> items = new ArrayList<S>(list); - for (int i = 0; i < len; i++) { - list.set(i, items.get(indices[i])); - } + public T sample() { + return items.get(rng.nextInt(items.size())); } } http://git-wip-us.apache.org/repos/asf/commons-rng/blob/5cd88b10/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/CollectionSamplerTest.java ---------------------------------------------------------------------- diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/CollectionSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/CollectionSamplerTest.java index 3bd897c..31ed081 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/CollectionSamplerTest.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/CollectionSamplerTest.java @@ -16,158 +16,41 @@ */ package org.apache.commons.rng.sampling; -import java.util.Set; -import java.util.HashSet; -import java.util.List; import java.util.ArrayList; -import java.util.Collection; -import java.util.Arrays; import org.junit.Assert; import org.junit.Test; -import org.apache.commons.math3.stat.inference.ChiSquareTest; - -import org.apache.commons.rng.UniformRandomProvider; import org.apache.commons.rng.simple.RandomSource; /** * Tests for {@link CollectionSampler}. */ public class CollectionSamplerTest { - private final UniformRandomProvider rng = RandomSource.create(RandomSource.ISAAC, 6543432321L); - private final ChiSquareTest chiSquareTest = new ChiSquareTest(); @Test - public void testSample() { - final String[][] c = { { "0", "1" }, { "0", "2" }, { "0", "3" }, { "0", "4" }, - { "1", "2" }, { "1", "3" }, { "1", "4" }, - { "2", "3" }, { "2", "4" }, - { "3", "4" } }; - final long[] observed = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - final double[] expected = { 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 }; - - final HashSet<String> cPop = new HashSet<String>(); // {0, 1, 2, 3, 4}. - for (int i = 0; i < 5; i++) { - cPop.add(Integer.toString(i)); - } - - final List<Set<String>> sets = new ArrayList<Set<String>>(); // 2-sets from 5. - for (int i = 0; i < 10; i++) { - final HashSet<String> hs = new HashSet<String>(); - hs.add(c[i][0]); - hs.add(c[i][1]); - sets.add(hs); - } - - final CollectionSampler<String> sampler = new CollectionSampler<String>(rng, cPop, 2); - for (int i = 0; i < 1000; i++) { - observed[findSample(sets, sampler.sample())]++; + public void testSampleTrivial() { + final ArrayList<String> list = new ArrayList<String>(); + list.add("Apache"); + list.add("Commons"); + list.add("RNG"); + + final CollectionSampler<String> sampler = + new CollectionSampler<String>(RandomSource.create(RandomSource.MWC_256), + list); + final String word = sampler.sample(); + for (String w : list) { + if (word.equals(w)) { + return; + } } - - // Pass if we cannot reject null hypothesis that distributions are the same - Assert.assertFalse(chiSquareTest.chiSquareTest(expected, observed, 0.001)); - } - - @Test - public void testSampleWhole() { - // Sample of size = size of collection must return the same collection. - final HashSet<String> hs = new HashSet<String>(); - hs.add("one"); - - final CollectionSampler<String> sampler = new CollectionSampler<String>(rng, hs, 1); - final Collection<String> one = sampler.sample(); - Assert.assertEquals(1, one.size()); - Assert.assertTrue(one.contains("one")); + Assert.fail(word + " not in list"); } @Test(expected=IllegalArgumentException.class) - public void testSamplePrecondition1() { - // Must fail for sample size > collection size. - final HashSet<String> hs = new HashSet<String>(); - hs.add("one"); - new CollectionSampler<String>(rng, hs, 2).sample(); - } - - @Test(expected=IllegalArgumentException.class) - public void testSamplePrecondition2() { + public void testSamplePrecondition() { // Must fail for empty collection. - final HashSet<String> hs = new HashSet<String>(); - new CollectionSampler<String>(rng, hs, 0).sample(); - } - - @Test - public void testShuffleTail() { - final List<Integer> orig = new ArrayList<Integer>(); - for (int i = 0; i < 10; i++) { - orig.add((i + 1) * rng.nextInt()); - } - final List<Integer> list = new ArrayList<Integer>(orig); - - final int start = 4; - CollectionSampler.shuffle(list, start, false, rng); - - // Ensure that all entries below index "start" did not move. - for (int i = 0; i < start; i++) { - Assert.assertEquals(orig.get(i), list.get(i)); - } - - // Ensure that at least one entry has moved. - boolean ok = false; - for (int i = start; i < orig.size() - 1; i++) { - if (!orig.get(i).equals(list.get(i))) { - ok = true; - break; - } - } - Assert.assertTrue(ok); - } - - @Test - public void testShuffleHead() { - final List<Integer> orig = new ArrayList<Integer>(); - for (int i = 0; i < 10; i++) { - orig.add((i + 1) * rng.nextInt()); - } - final List<Integer> list = new ArrayList<Integer>(orig); - - final int start = 4; - CollectionSampler.shuffle(list, start, true, rng); - - // Ensure that all entries above index "start" did not move. - for (int i = start + 1; i < orig.size(); i++) { - Assert.assertEquals(orig.get(i), list.get(i)); - } - - // Ensure that at least one entry has moved. - boolean ok = false; - for (int i = 0; i <= start; i++) { - if (!orig.get(i).equals(list.get(i))) { - ok = true; - break; - } - } - Assert.assertTrue(ok); - } - - //// Support methods. - - private <T extends Set<String>> int findSample(List<T> u, - Collection<String> sampList) { - final String[] samp = sampList.toArray(new String[sampList.size()]); - for (int i = 0; i < u.size(); i++) { - final T set = u.get(i); - final HashSet<String> sampSet = new HashSet<String>(); - for (int j = 0; j < samp.length; j++) { - sampSet.add(samp[j]); - } - if (set.equals(sampSet)) { - return i; - } - } - - Assert.fail("Sample not found: { " + - samp[0] + ", " + samp[1] + " }"); - return -1; + new CollectionSampler<String>(RandomSource.create(RandomSource.MT), + new ArrayList<String>()); } }