This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git

commit 88608c23a424ce54fb96b51f1d677eeff35c1607
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Wed Mar 2 01:03:14 2022 +0000

    RNG-167: Sampling from a t-distribution
---
 .../rng/sampling/distribution/TSampler.java        | 220 +++++++++++++++++++++
 .../distribution/ContinuousSamplersList.java       |  15 ++
 .../rng/sampling/distribution/TSamplerTest.java    |  67 +++++++
 src/changes/changes.xml                            |   3 +
 src/main/resources/pmd/pmd-ruleset.xml             |   6 +
 5 files changed, 311 insertions(+)

diff --git 
a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java
 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java
new file mode 100644
index 0000000..2af5cff
--- /dev/null
+++ 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/TSampler.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import java.util.function.DoubleUnaryOperator;
+import org.apache.commons.rng.UniformRandomProvider;
+
+/**
+ * Sampling from a T distribution.
+ *
+ * <p>Uses Bailey's algorithm for t-distribution sampling:</p>
+ *
+ * <blockquote>
+ * <pre>
+ * Bailey, R. W. (1994)
+ * "Polar Generation of Random Variates with the t-Distribution."
+ * Mathematics of Computation 62, 779-781.
+ * </pre>
+ * </blockquote>
+ *
+ * <p>Sampling uses {@link UniformRandomProvider#nextLong()}.</p>
+ *
+ * @see <a 
href="https://en.wikipedia.org/wiki/Student%27s_t-distribution";>Student's T 
distribution (wikipedia)</a>
+ * @see <a href="https://doi.org/10.2307/2153537";>Mathematics of Computation, 
62, 779-781</a>
+ * @since 1.5
+ */
+public abstract class TSampler implements SharedStateContinuousSampler {
+    /** Threshold for huge degrees of freedom. Above this value the CDF of the 
t distribution
+     * matches the normal distribution. Value is 2/eps (where eps is the 
machine epsilon)
+     * or approximately 9.0e15.  */
+    private static final double HUGE_DF = 0x1.0p53;
+
+    /** Source of randomness. */
+    private final UniformRandomProvider rng;
+
+    /**
+     * Sample from a t-distribution using Bailey's algorithm.
+     */
+    private static final class StudentsTSampler extends TSampler {
+        /** Threshold for large degrees of freedom. */
+        private static final double LARGE_DF = 25;
+        /** The multiplier to convert the least significant 53-bits of a 
{@code long} to a
+         * uniform {@code double}. */
+        private static final double DOUBLE_MULTIPLIER = 0x1.0p-53;
+
+        /** Degrees of freedom. */
+        private final double df;
+        /** Function to compute pow(x, -2/v) - 1, where v = degrees of 
freedom. */
+        private final DoubleUnaryOperator powm1;
+
+        /**
+         * @param rng Generator of uniformly distributed random numbers.
+         * @param v Degrees of freedom.
+         */
+        StudentsTSampler(UniformRandomProvider rng,
+                         double v) {
+            super(rng);
+            df = v;
+
+            // The sampler requires pow(w, -2/v) - 1 with
+            // 0 <= w <= 1; Expected(w) = sqrt(0.5).
+            // When the exponent is small then pow(x, y) -> 1.
+            // This affects large degrees of freedom.
+            final double exponent = -2 / v;
+            powm1 = v > LARGE_DF ?
+                x -> Math.expm1(Math.log(x) * exponent) :
+                x -> Math.pow(x, exponent) - 1;
+        }
+
+        /**
+         * @param rng Generator of uniformly distributed random numbers.
+         * @param source Source to copy.
+         */
+        private StudentsTSampler(UniformRandomProvider rng,
+                                 StudentsTSampler source) {
+            super(rng);
+            df = source.df;
+            powm1 = source.powm1;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public double sample() {
+            // Require u and v in [0, 1] and a random sign.
+            // Create u in (0, 1] to avoid generating nan
+            // from u*u/w (0/0) or r2*c2 (inf*0).
+            final double u = InternalUtils.makeNonZeroDouble(nextLong());
+            final double v = makeSignedDouble(nextLong());
+            final double w = u * u + v * v;
+            if (w > 1) {
+                // Rejection frequency = 1 - pi/4 = 0.215.
+                // Recursion will generate stack overflow given a broken RNG
+                // and avoids an infinite loop.
+                return sample();
+            }
+            // Sidestep a square-root calculation.
+            final double c2 = u * u / w;
+            final double r2 = df * powm1.applyAsDouble(w);
+            // Choose sign at random from the sign of v.
+            return Math.copySign(Math.sqrt(r2 * c2), v);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public StudentsTSampler 
withUniformRandomProvider(UniformRandomProvider rng) {
+            return new StudentsTSampler(rng, this);
+        }
+
+        /**
+         * Creates a signed double in the range {@code [-1, 1)}. The magnitude 
is sampled evenly
+         * from the 2<sup>54</sup> dyadic rationals in the range.
+         *
+         * <p>Note: This method will not return samples for both -0.0 and 0.0.
+         *
+         * @param bits the bits
+         * @return the double
+         */
+        private static double makeSignedDouble(long bits) {
+            // As per o.a.c.rng.core.utils.NumberFactory.makeDouble(long) but 
using a signed
+            // shift of 10 in place of an unsigned shift of 11.
+            // Use the upper 54 bits on the assumption they are more random.
+            // The sign bit is maintained by the signed shift.
+            // The next 53 bits generates a magnitude in the range [0, 2^53) 
or [-2^53, 0).
+            return (bits >> 10) * DOUBLE_MULTIPLIER;
+        }
+    }
+
+    /**
+     * Sample from a t-distribution using a normal distribution.
+     * This is used when the degrees of freedom is extremely large (e.g. 
{@code > 1e16}).
+     */
+    private static final class NormalTSampler extends TSampler {
+        /** Underlying normalized Gaussian sampler. */
+        private final NormalizedGaussianSampler sampler;
+
+        /**
+         * @param rng Generator of uniformly distributed random numbers.
+         */
+        NormalTSampler(UniformRandomProvider rng) {
+            super(rng);
+            this.sampler = ZigguratSampler.NormalizedGaussian.of(rng);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public double sample() {
+            return sampler.sample();
+
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public NormalTSampler withUniformRandomProvider(UniformRandomProvider 
rng) {
+            return new NormalTSampler(rng);
+        }
+    }
+
+    /**
+     * @param rng Generator of uniformly distributed random numbers.
+     */
+    TSampler(UniformRandomProvider rng) {
+        this.rng = rng;
+    }
+
+    /** {@inheritDoc} */
+    // Redeclare the signature to return a TSampler not a 
SharedStateContinuousSampler
+    @Override
+    public abstract TSampler withUniformRandomProvider(UniformRandomProvider 
rng);
+
+    /**
+     * Generates a {@code long} value.
+     * Used by algorithm implementations without exposing access to the RNG.
+     *
+     * @return the next random value
+     */
+    long nextLong() {
+        return rng.nextLong();
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public String toString() {
+        return "Student's t deviate [" + rng.toString() + "]";
+    }
+
+    /**
+     * Create a new t distribution sampler.
+     *
+     * @param rng Generator of uniformly distributed random numbers.
+     * @param degreesOfFreedom Degrees of freedom.
+     * @return the sampler
+     * @throws IllegalArgumentException if {@code degreesOfFreedom <= 0}
+     */
+    public static TSampler of(UniformRandomProvider rng,
+                              double degreesOfFreedom) {
+        if (degreesOfFreedom > HUGE_DF) {
+            return new NormalTSampler(rng);
+        } else if (degreesOfFreedom > 0) {
+            return new StudentsTSampler(rng, degreesOfFreedom);
+        } else {
+            // df <= 0 or nan
+            throw new IllegalArgumentException(
+                "degrees of freedom is not strictly positive: " + 
degreesOfFreedom);
+        }
+    }
+}
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
index b11ccf5..b93cbcc 100644
--- 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/ContinuousSamplersList.java
@@ -254,6 +254,21 @@ public final class ContinuousSamplersList {
             final double dofT = 0.76543;
             add(LIST, new 
org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT),
                 RandomSource.ISAAC.create());
+            // T.
+            add(LIST, new 
org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT),
+                TSampler.of(RandomSource.SFC_64.create(), dofT));
+            // T with 'large' degrees of freedom.
+            final double dofTlarge = 30;
+            add(LIST, new 
org.apache.commons.math3.distribution.TDistribution(unusedRng, dofTlarge),
+                TSampler.of(RandomSource.XO_SHI_RO_256_PP.create(), 
dofTlarge));
+            // T with 'huge' degrees of freedom (approaches a normal 
distribution).
+            // Deciles are computed incorrectly using Commons Math; values 
computed using Matlab.
+            // Note: DF is below the switch to using a normal distribution.
+            final double dofTHuge = 1e15;
+            add(LIST, new double[] {-1.2815515655446015, -0.84162123357291463, 
-0.52440051270804089,
+                -0.25334710313579983, 0, 0.25334710313579983, 
0.52440051270804089, 0.84162123357291474,
+                1.2815515655446015, Double.POSITIVE_INFINITY},
+                TSampler.of(RandomSource.XO_SHI_RO_256_SS.create(), dofTHuge));
 
             // Triangular ("inverse method").
             final double aTriangle = -0.76543;
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java
new file mode 100644
index 0000000..2112ab2
--- /dev/null
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/TSamplerTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.RandomAssert;
+import org.apache.commons.rng.simple.RandomSource;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * Test for the {@link TSampler}.
+ */
+class TSamplerTest {
+    /**
+     * Test the constructor with an invalid degrees of freedom.
+     */
+    @ParameterizedTest
+    @ValueSource(doubles = {0, -1, Double.NaN})
+    void testConstructorThrowsWithBadDegreesOfFreedom(double degreesOfFreedom) 
{
+        final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create(0L);
+        Assertions.assertThrows(IllegalArgumentException.class,
+            () -> TSampler.of(rng, degreesOfFreedom));
+    }
+
+    /**
+     * Test the SharedStateSampler implementation.
+     */
+    @ParameterizedTest
+    @ValueSource(doubles = {4.56, 1e16})
+    void testSharedStateSampler(double degreesOfFreedom) {
+        final UniformRandomProvider rng1 = 
RandomSource.SPLIT_MIX_64.create(0L);
+        final UniformRandomProvider rng2 = 
RandomSource.SPLIT_MIX_64.create(0L);
+        final TSampler sampler1 = TSampler.of(rng1, degreesOfFreedom);
+        final TSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
+        RandomAssert.assertProduceSameSequence(sampler1, sampler2);
+    }
+
+    /**
+     * Test extremely large degrees of freedom is approximated using a normal 
distribution.
+     */
+    @Test
+    void testExtremelyLargeDegreesOfFreedom() {
+        final UniformRandomProvider rng1 = 
RandomSource.SPLIT_MIX_64.create(0L);
+        final UniformRandomProvider rng2 = 
RandomSource.SPLIT_MIX_64.create(0L);
+        final double degreesOfFreedom = 1e16;
+        final ContinuousSampler sampler1 = TSampler.of(rng1, degreesOfFreedom);
+        final ContinuousSampler sampler2 = 
ZigguratSampler.NormalizedGaussian.of(rng2);
+        RandomAssert.assertProduceSameSequence(sampler1, sampler2);
+    }
+}
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 89ec5b9..c37cf53 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -81,6 +81,9 @@ re-run tests that fail, and pass the build if they succeed
 within the allotted number of reruns (the test will be marked
 as 'flaky' in the report).
 ">
+      <action dev="aherbert" type="add" issue="167">
+        New "TSampler" class to sample from Student's t-distribution.
+      </action>
       <action dev="aherbert" type="fix" issue="166">
         Update "LogNormalSampler" and "BoxMullerLogNormalSampler" to allow a 
negative mean for
         the natural logarithm of the distribution values.
diff --git a/src/main/resources/pmd/pmd-ruleset.xml 
b/src/main/resources/pmd/pmd-ruleset.xml
index 0a6775a..6369c2f 100644
--- a/src/main/resources/pmd/pmd-ruleset.xml
+++ b/src/main/resources/pmd/pmd-ruleset.xml
@@ -95,6 +95,12 @@
         or @SimpleName='ExamplesSamplingCommand' or 
@SimpleName='UniformSamplingVisualCheckCommand']"/>
     </properties>
   </rule>
+  <rule ref="category/java/bestpractices.xml/AccessorClassGeneration">
+    <properties>
+      <!-- False positive -->
+      <property name="violationSuppressXPath" 
value="//ClassOrInterfaceDeclaration[@SimpleName='TSampler']"/>
+    </properties>
+  </rule>
 
   <rule ref="category/java/codestyle.xml/ClassNamingConventions">
     <properties>

Reply via email to