This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 88608c23a424ce54fb96b51f1d677eeff35c1607 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Wed Mar 2 01:03:14 2022 +0000 RNG-167: Sampling from a t-distribution --- .../rng/sampling/distribution/TSampler.java | 220 +++++++++++++++++++++ .../distribution/ContinuousSamplersList.java | 15 ++ .../rng/sampling/distribution/TSamplerTest.java | 67 +++++++ src/changes/changes.xml | 3 + src/main/resources/pmd/pmd-ruleset.xml | 6 + 5 files changed, 311 insertions(+) diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java new file mode 100644 index 0000000..2af5cff --- /dev/null +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling.distribution; + +import java.util.function.DoubleUnaryOperator; +import org.apache.commons.rng.UniformRandomProvider; + +/** + * Sampling from a T distribution. + * + * <p>Uses Bailey's algorithm for t-distribution sampling:</p> + * + * <blockquote> + * <pre> + * Bailey, R. W. (1994) + * "Polar Generation of Random Variates with the t-Distribution." + * Mathematics of Computation 62, 779-781. + * </pre> + * </blockquote> + * + * <p>Sampling uses {@link UniformRandomProvider#nextLong()}.</p> + * + * @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student's T distribution (wikipedia)</a> + * @see <a href="https://doi.org/10.2307/2153537">Mathematics of Computation, 62, 779-781</a> + * @since 1.5 + */ +public abstract class TSampler implements SharedStateContinuousSampler { + /** Threshold for huge degrees of freedom. Above this value the CDF of the t distribution + * matches the normal distribution. Value is 2/eps (where eps is the machine epsilon) + * or approximately 9.0e15. */ + private static final double HUGE_DF = 0x1.0p53; + + /** Source of randomness. */ + private final UniformRandomProvider rng; + + /** + * Sample from a t-distribution using Bailey's algorithm. + */ + private static final class StudentsTSampler extends TSampler { + /** Threshold for large degrees of freedom. */ + private static final double LARGE_DF = 25; + /** The multiplier to convert the least significant 53-bits of a {@code long} to a + * uniform {@code double}. */ + private static final double DOUBLE_MULTIPLIER = 0x1.0p-53; + + /** Degrees of freedom. */ + private final double df; + /** Function to compute pow(x, -2/v) - 1, where v = degrees of freedom. */ + private final DoubleUnaryOperator powm1; + + /** + * @param rng Generator of uniformly distributed random numbers. + * @param v Degrees of freedom. + */ + StudentsTSampler(UniformRandomProvider rng, + double v) { + super(rng); + df = v; + + // The sampler requires pow(w, -2/v) - 1 with + // 0 <= w <= 1; Expected(w) = sqrt(0.5). + // When the exponent is small then pow(x, y) -> 1. + // This affects large degrees of freedom. + final double exponent = -2 / v; + powm1 = v > LARGE_DF ? + x -> Math.expm1(Math.log(x) * exponent) : + x -> Math.pow(x, exponent) - 1; + } + + /** + * @param rng Generator of uniformly distributed random numbers. + * @param source Source to copy. + */ + private StudentsTSampler(UniformRandomProvider rng, + StudentsTSampler source) { + super(rng); + df = source.df; + powm1 = source.powm1; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + // Require u and v in [0, 1] and a random sign. + // Create u in (0, 1] to avoid generating nan + // from u*u/w (0/0) or r2*c2 (inf*0). + final double u = InternalUtils.makeNonZeroDouble(nextLong()); + final double v = makeSignedDouble(nextLong()); + final double w = u * u + v * v; + if (w > 1) { + // Rejection frequency = 1 - pi/4 = 0.215. + // Recursion will generate stack overflow given a broken RNG + // and avoids an infinite loop. + return sample(); + } + // Sidestep a square-root calculation. + final double c2 = u * u / w; + final double r2 = df * powm1.applyAsDouble(w); + // Choose sign at random from the sign of v. + return Math.copySign(Math.sqrt(r2 * c2), v); + } + + /** {@inheritDoc} */ + @Override + public StudentsTSampler withUniformRandomProvider(UniformRandomProvider rng) { + return new StudentsTSampler(rng, this); + } + + /** + * Creates a signed double in the range {@code [-1, 1)}. The magnitude is sampled evenly + * from the 2<sup>54</sup> dyadic rationals in the range. + * + * <p>Note: This method will not return samples for both -0.0 and 0.0. + * + * @param bits the bits + * @return the double + */ + private static double makeSignedDouble(long bits) { + // As per o.a.c.rng.core.utils.NumberFactory.makeDouble(long) but using a signed + // shift of 10 in place of an unsigned shift of 11. + // Use the upper 54 bits on the assumption they are more random. + // The sign bit is maintained by the signed shift. + // The next 53 bits generates a magnitude in the range [0, 2^53) or [-2^53, 0). + return (bits >> 10) * DOUBLE_MULTIPLIER; + } + } + + /** + * Sample from a t-distribution using a normal distribution. + * This is used when the degrees of freedom is extremely large (e.g. {@code > 1e16}). + */ + private static final class NormalTSampler extends TSampler { + /** Underlying normalized Gaussian sampler. */ + private final NormalizedGaussianSampler sampler; + + /** + * @param rng Generator of uniformly distributed random numbers. + */ + NormalTSampler(UniformRandomProvider rng) { + super(rng); + this.sampler = ZigguratSampler.NormalizedGaussian.of(rng); + } + + /** {@inheritDoc} */ + @Override + public double sample() { + return sampler.sample(); + + } + + /** {@inheritDoc} */ + @Override + public NormalTSampler withUniformRandomProvider(UniformRandomProvider rng) { + return new NormalTSampler(rng); + } + } + + /** + * @param rng Generator of uniformly distributed random numbers. + */ + TSampler(UniformRandomProvider rng) { + this.rng = rng; + } + + /** {@inheritDoc} */ + // Redeclare the signature to return a TSampler not a SharedStateContinuousSampler + @Override + public abstract TSampler withUniformRandomProvider(UniformRandomProvider rng); + + /** + * Generates a {@code long} value. + * Used by algorithm implementations without exposing access to the RNG. + * + * @return the next random value + */ + long nextLong() { + return rng.nextLong(); + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return "Student's t deviate [" + rng.toString() + "]"; + } + + /** + * Create a new t distribution sampler. + * + * @param rng Generator of uniformly distributed random numbers. + * @param degreesOfFreedom Degrees of freedom. + * @return the sampler + * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0} + */ + public static TSampler of(UniformRandomProvider rng, + double degreesOfFreedom) { + if (degreesOfFreedom > HUGE_DF) { + return new NormalTSampler(rng); + } else if (degreesOfFreedom > 0) { + return new StudentsTSampler(rng, degreesOfFreedom); + } else { + // df <= 0 or nan + throw new IllegalArgumentException( + "degrees of freedom is not strictly positive: " + degreesOfFreedom); + } + } +} diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java index b11ccf5..b93cbcc 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java @@ -254,6 +254,21 @@ public final class ContinuousSamplersList { final double dofT = 0.76543; add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT), RandomSource.ISAAC.create()); + // T. + add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT), + TSampler.of(RandomSource.SFC_64.create(), dofT)); + // T with 'large' degrees of freedom. + final double dofTlarge = 30; + add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofTlarge), + TSampler.of(RandomSource.XO_SHI_RO_256_PP.create(), dofTlarge)); + // T with 'huge' degrees of freedom (approaches a normal distribution). + // Deciles are computed incorrectly using Commons Math; values computed using Matlab. + // Note: DF is below the switch to using a normal distribution. + final double dofTHuge = 1e15; + add(LIST, new double[] {-1.2815515655446015, -0.84162123357291463, -0.52440051270804089, + -0.25334710313579983, 0, 0.25334710313579983, 0.52440051270804089, 0.84162123357291474, + 1.2815515655446015, Double.POSITIVE_INFINITY}, + TSampler.of(RandomSource.XO_SHI_RO_256_SS.create(), dofTHuge)); // Triangular ("inverse method"). final double aTriangle = -0.76543; diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java new file mode 100644 index 0000000..2112ab2 --- /dev/null +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.RandomAssert; +import org.apache.commons.rng.simple.RandomSource; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Test for the {@link TSampler}. + */ +class TSamplerTest { + /** + * Test the constructor with an invalid degrees of freedom. + */ + @ParameterizedTest + @ValueSource(doubles = {0, -1, Double.NaN}) + void testConstructorThrowsWithBadDegreesOfFreedom(double degreesOfFreedom) { + final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create(0L); + Assertions.assertThrows(IllegalArgumentException.class, + () -> TSampler.of(rng, degreesOfFreedom)); + } + + /** + * Test the SharedStateSampler implementation. + */ + @ParameterizedTest + @ValueSource(doubles = {4.56, 1e16}) + void testSharedStateSampler(double degreesOfFreedom) { + final UniformRandomProvider rng1 = RandomSource.SPLIT_MIX_64.create(0L); + final UniformRandomProvider rng2 = RandomSource.SPLIT_MIX_64.create(0L); + final TSampler sampler1 = TSampler.of(rng1, degreesOfFreedom); + final TSampler sampler2 = sampler1.withUniformRandomProvider(rng2); + RandomAssert.assertProduceSameSequence(sampler1, sampler2); + } + + /** + * Test extremely large degrees of freedom is approximated using a normal distribution. + */ + @Test + void testExtremelyLargeDegreesOfFreedom() { + final UniformRandomProvider rng1 = RandomSource.SPLIT_MIX_64.create(0L); + final UniformRandomProvider rng2 = RandomSource.SPLIT_MIX_64.create(0L); + final double degreesOfFreedom = 1e16; + final ContinuousSampler sampler1 = TSampler.of(rng1, degreesOfFreedom); + final ContinuousSampler sampler2 = ZigguratSampler.NormalizedGaussian.of(rng2); + RandomAssert.assertProduceSameSequence(sampler1, sampler2); + } +} diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 89ec5b9..c37cf53 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -81,6 +81,9 @@ re-run tests that fail, and pass the build if they succeed within the allotted number of reruns (the test will be marked as 'flaky' in the report). "> + <action dev="aherbert" type="add" issue="167"> + New "TSampler" class to sample from Student's t-distribution. + </action> <action dev="aherbert" type="fix" issue="166"> Update "LogNormalSampler" and "BoxMullerLogNormalSampler" to allow a negative mean for the natural logarithm of the distribution values. diff --git a/src/main/resources/pmd/pmd-ruleset.xml b/src/main/resources/pmd/pmd-ruleset.xml index 0a6775a..6369c2f 100644 --- a/src/main/resources/pmd/pmd-ruleset.xml +++ b/src/main/resources/pmd/pmd-ruleset.xml @@ -95,6 +95,12 @@ or @SimpleName='ExamplesSamplingCommand' or @SimpleName='UniformSamplingVisualCheckCommand']"/> </properties> </rule> + <rule ref="category/java/bestpractices.xml/AccessorClassGeneration"> + <properties> + <!-- False positive --> + <property name="violationSuppressXPath" value="//ClassOrInterfaceDeclaration[@SimpleName='TSampler']"/> + </properties> + </rule> <rule ref="category/java/codestyle.xml/ClassNamingConventions"> <properties>