Repository: commons-statistics Updated Branches: refs/heads/master 30d7c8f6b -> 05626d011
http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TriangularDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TriangularDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TriangularDistribution.java new file mode 100644 index 0000000..6a94b62 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TriangularDistribution.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +/** + * Implementation of the triangular real distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Triangular_distribution"> + * Triangular distribution (Wikipedia)</a> + * + * @since 3.0 + */ +public class TriangularDistribution extends AbstractContinuousDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = 20160311L; + /** Lower limit of this distribution (inclusive). */ + private final double a; + /** Upper limit of this distribution (inclusive). */ + private final double b; + /** Mode of this distribution. */ + private final double c; + + /** + * Creates a distribution. + * + * @param a Lower limit of this distribution (inclusive). + * @param b Upper limit of this distribution (inclusive). + * @param c Mode of this distribution. + * @throws IllegalArgumentException if {@code a >= b}, if {@code c > b} + * or if {@code c < a}. + */ + public TriangularDistribution(double a, + double c, + double b) { + if (a >= b) { + throw new DistributionException(DistributionException.TOO_LARGE, + a, b); + } + if (c < a) { + throw new DistributionException(DistributionException.TOO_SMALL, + c, a); + } + if (c > b) { + throw new DistributionException(DistributionException.TOO_LARGE, + c, b); + } + + this.a = a; + this.c = c; + this.b = b; + } + + /** + * Gets the mode. + * + * @return the mode of the distribution. + */ + public double getMode() { + return c; + } + + /** + * {@inheritDoc} + * + * For lower limit {@code a}, upper limit {@code b} and mode {@code c}, the + * PDF is given by + * <ul> + * <li>{@code 2 * (x - a) / [(b - a) * (c - a)]} if {@code a <= x < c},</li> + * <li>{@code 2 / (b - a)} if {@code x = c},</li> + * <li>{@code 2 * (b - x) / [(b - a) * (b - c)]} if {@code c < x <= b},</li> + * <li>{@code 0} otherwise. + * </ul> + */ + @Override + public double density(double x) { + if (x < a) { + return 0; + } + if (a <= x && x < c) { + double divident = 2 * (x - a); + double divisor = (b - a) * (c - a); + return divident / divisor; + } + if (x == c) { + return 2 / (b - a); + } + if (c < x && x <= b) { + double divident = 2 * (b - x); + double divisor = (b - a) * (b - c); + return divident / divisor; + } + return 0; + } + + /** + * {@inheritDoc} + * + * For lower limit {@code a}, upper limit {@code b} and mode {@code c}, the + * CDF is given by + * <ul> + * <li>{@code 0} if {@code x < a},</li> + * <li>{@code (x - a)^2 / [(b - a) * (c - a)]} if {@code a <= x < c},</li> + * <li>{@code (c - a) / (b - a)} if {@code x = c},</li> + * <li>{@code 1 - (b - x)^2 / [(b - a) * (b - c)]} if {@code c < x <= b},</li> + * <li>{@code 1} if {@code x > b}.</li> + * </ul> + */ + @Override + public double cumulativeProbability(double x) { + if (x < a) { + return 0; + } + if (a <= x && x < c) { + double divident = (x - a) * (x - a); + double divisor = (b - a) * (c - a); + return divident / divisor; + } + if (x == c) { + return (c - a) / (b - a); + } + if (c < x && x <= b) { + double divident = (b - x) * (b - x); + double divisor = (b - a) * (b - c); + return 1 - (divident / divisor); + } + return 1; + } + + /** + * {@inheritDoc} + * + * For lower limit {@code a}, upper limit {@code b}, and mode {@code c}, + * the mean is {@code (a + b + c) / 3}. + */ + @Override + public double getNumericalMean() { + return (a + b + c) / 3; + } + + /** + * {@inheritDoc} + * + * For lower limit {@code a}, upper limit {@code b}, and mode {@code c}, + * the variance is {@code (a^2 + b^2 + c^2 - a * b - a * c - b * c) / 18}. + */ + @Override + public double getNumericalVariance() { + return (a * a + b * b + c * c - a * b - a * c - b * c) / 18; + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is equal to the lower limit parameter + * {@code a} of the distribution. + * + * @return lower bound of the support + */ + @Override + public double getSupportLowerBound() { + return a; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is equal to the upper limit parameter + * {@code b} of the distribution. + * + * @return upper bound of the support + */ + @Override + public double getSupportUpperBound() { + return b; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) { + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + if (p == 0) { + return a; + } + if (p == 1) { + return b; + } + if (p < (c - a) / (b - a)) { + return a + Math.sqrt(p * (b - a) * (c - a)); + } + return b - Math.sqrt((1 - p) * (b - a) * (b - c)); + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformContinuousDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformContinuousDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformContinuousDistribution.java new file mode 100644 index 0000000..2bb9a0b --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformContinuousDistribution.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.ContinuousSampler; +import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Uniform_distribution_(continuous)">uniform distribution</a>. + */ +public class UniformContinuousDistribution extends AbstractContinuousDistribution { + private final double lower; + /** Upper bound of this distribution (exclusive). */ + private final double upper; + + /** + * Create a standard uniform real distribution with lower bound (inclusive) + * equal to zero and upper bound (exclusive) equal to one. + */ + public UniformContinuousDistribution() { + this(0, 1); + } + + /** + * Creates a uniform distribution. + * + * @param lower Lower bound of this distribution (inclusive). + * @param upper Upper bound of this distribution (exclusive). + * @throws IllegalArgumentException if {@code lower >= upper}. + */ + public UniformContinuousDistribution(double lower, + double upper) { + if (lower >= upper) { + throw new DistributionException(DistributionException.TOO_LARGE, + lower, upper); + } + + this.lower = lower; + this.upper = upper; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + if (x < lower || + x > upper) { + return 0; + } + return 1 / (upper - lower); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + if (x <= lower) { + return 0; + } + if (x >= upper) { + return 1; + } + return (x - lower) / (upper - lower); + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) { + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } + return p * (upper - lower) + lower; + } + + /** + * {@inheritDoc} + * + * For lower bound {@code lower} and upper bound {@code upper}, the mean is + * {@code 0.5 * (lower + upper)}. + */ + @Override + public double getNumericalMean() { + return 0.5 * (lower + upper); + } + + /** + * {@inheritDoc} + * + * For lower bound {@code lower} and upper bound {@code upper}, the + * variance is {@code (upper - lower)^2 / 12}. + */ + @Override + public double getNumericalVariance() { + double ul = upper - lower; + return ul * ul / 12; + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is equal to the lower bound parameter + * of the distribution. + * + * @return lower bound of the support + */ + @Override + public double getSupportLowerBound() { + return lower; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is equal to the upper bound parameter + * of the distribution. + * + * @return upper bound of the support + */ + @Override + public double getSupportUpperBound() { + return upper; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new ContinuousDistribution.Sampler() { + /** + * Uniform distribution sampler. + */ + private final ContinuousSampler sampler = + new ContinuousUniformSampler(rng, lower, upper); + + /**{@inheritDoc} */ + @Override + public double sample() { + return sampler.sample(); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformDiscreteDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformDiscreteDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformDiscreteDistribution.java new file mode 100644 index 0000000..41df2bd --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/UniformDiscreteDistribution.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.DiscreteSampler; +import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler; + +/** + * Implementation of the <a href="http://en.wikipedia.org/wiki/Uniform_distribution_(discrete)"> + * uniform integer distribution</a>. + */ +public class UniformDiscreteDistribution extends AbstractDiscreteDistribution { + /** 1 / 12 **/ + private static final double ONE_TWELFTH = 1 / 12d; + /** Lower bound (inclusive) of this distribution. */ + private final int lower; + /** Upper bound (inclusive) of this distribution. */ + private final int upper; + /** "upper" + "lower" (to avoid overflow). */ + private final double upperPlusLower; + /** "upper" - "lower" (to avoid overflow). */ + private final double upperMinusLower; + + /** + * Creates a new uniform integer distribution using the given lower and + * upper bounds (both inclusive). + * + * @param lower Lower bound (inclusive) of this distribution. + * @param upper Upper bound (inclusive) of this distribution. + * @throws IllegalArgumentException if {@code lower > upper}. + */ + public UniformDiscreteDistribution(int lower, + int upper) { + if (lower > upper) { + throw new DistributionException(DistributionException.TOO_LARGE, + lower, upper); + } + this.lower = lower; + this.upper = upper; + upperPlusLower = (double) upper + (double) lower; + upperMinusLower = (double) upper - (double) lower; + } + + /** {@inheritDoc} */ + @Override + public double probability(int x) { + if (x < lower || x > upper) { + return 0; + } + return 1 / (upperMinusLower + 1); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(int x) { + if (x < lower) { + return 0; + } + if (x > upper) { + return 1; + } + return (x - lower + 1) / (upperMinusLower + 1); + } + + /** + * {@inheritDoc} + * + * For lower bound {@code lower} and upper bound {@code upper}, the mean is + * {@code 0.5 * (lower + upper)}. + */ + @Override + public double getNumericalMean() { + return 0.5 * upperPlusLower; + } + + /** + * {@inheritDoc} + * + * For lower bound {@code lower} and upper bound {@code upper}, and + * {@code n = upper - lower + 1}, the variance is {@code (n^2 - 1) / 12}. + */ + @Override + public double getNumericalVariance() { + double n = upperMinusLower + 1; + return ONE_TWELFTH * (n * n - 1); + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is equal to the lower bound parameter + * of the distribution. + * + * @return lower bound of the support + */ + @Override + public int getSupportLowerBound() { + return lower; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is equal to the upper bound parameter + * of the distribution. + * + * @return upper bound of the support + */ + @Override + public int getSupportUpperBound() { + return upper; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /**{@inheritDoc} */ + @Override + public DiscreteDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new DiscreteDistribution.Sampler() { + /** + * Discrete uniform distribution sampler. + */ + private final DiscreteSampler sampler = + new DiscreteUniformSampler(rng, lower, upper); + + /**{@inheritDoc} */ + @Override + public int sample() { + return sampler.sample(); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/WeibullDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/WeibullDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/WeibullDistribution.java new file mode 100644 index 0000000..0e396d8 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/WeibullDistribution.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.apache.commons.numbers.gamma.LogGamma; + +/** + * Implementation of the Weibull distribution. This implementation uses the + * two parameter form of the distribution defined by + * <a href="http://mathworld.wolfram.com/WeibullDistribution.html"> + * Weibull Distribution</a>, equations (1) and (2). + * + * @see <a href="http://en.wikipedia.org/wiki/Weibull_distribution">Weibull distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/WeibullDistribution.html">Weibull distribution (MathWorld)</a> + * + * @since 1.1 + */ +public class WeibullDistribution extends AbstractContinuousDistribution { + /** The shape parameter. */ + private final double shape; + /** The scale parameter. */ + private final double scale; + + /** + * Creates a distribution. + * + * @param alpha Shape parameter. + * @param beta Scale parameter. + * @throws IllegalArgumentException if {@code alpha <= 0} or {@code beta <= 0}. + */ + public WeibullDistribution(double alpha, + double beta) { + if (alpha <= 0) { + throw new DistributionException(DistributionException.NEGATIVE, + alpha); + } + if (beta <= 0) { + throw new DistributionException(DistributionException.NEGATIVE, + beta); + } + scale = beta; + shape = alpha; + } + + /** + * Access the shape parameter, {@code alpha}. + * + * @return the shape parameter, {@code alpha}. + */ + public double getShape() { + return shape; + } + + /** + * Access the scale parameter, {@code beta}. + * + * @return the scale parameter, {@code beta}. + */ + public double getScale() { + return scale; + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + if (x < 0) { + return 0; + } + + final double xscale = x / scale; + final double xscalepow = Math.pow(xscale, shape - 1); + + /* + * Math.pow(x / scale, shape) = + * Math.pow(xscale, shape) = + * Math.pow(xscale, shape - 1) * xscale + */ + final double xscalepowshape = xscalepow * xscale; + + return (shape / scale) * xscalepow * Math.exp(-xscalepowshape); + } + + /** {@inheritDoc} */ + @Override + public double logDensity(double x) { + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } + + final double xscale = x / scale; + final double logxscalepow = Math.log(xscale) * (shape - 1); + + /* + * Math.pow(x / scale, shape) = + * Math.pow(xscale, shape) = + * Math.pow(xscale, shape - 1) * xscale + */ + final double xscalepowshape = Math.exp(logxscalepow) * xscale; + + return Math.log(shape / scale) + logxscalepow - xscalepowshape; + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + double ret; + if (x <= 0.0) { + ret = 0.0; + } else { + ret = 1.0 - Math.exp(-Math.pow(x / scale, shape)); + } + return ret; + } + + /** + * {@inheritDoc} + * + * Returns {@code 0} when {@code p == 0} and + * {@code Double.POSITIVE_INFINITY} when {@code p == 1}. + */ + @Override + public double inverseCumulativeProbability(double p) { + double ret; + if (p < 0 || + p > 1) { + throw new DistributionException(DistributionException.OUT_OF_RANGE, p, 0, 1); + } else if (p == 0) { + ret = 0.0; + } else if (p == 1) { + ret = Double.POSITIVE_INFINITY; + } else { + ret = scale * Math.pow(-Math.log1p(-p), 1.0 / shape); + } + return ret; + } + + /** + * {@inheritDoc} + * + * The mean is {@code scale * Gamma(1 + (1 / shape))}, where {@code Gamma()} + * is the Gamma-function. + */ + @Override + public double getNumericalMean() { + final double sh = getShape(); + final double sc = getScale(); + + return sc * Math.exp(LogGamma.value(1 + (1 / sh))); + } + + /** + * {@inheritDoc} + * + * The variance is {@code scale^2 * Gamma(1 + (2 / shape)) - mean^2} + * where {@code Gamma()} is the Gamma-function. + */ + @Override + public double getNumericalVariance() { + final double sh = getShape(); + final double sc = getScale(); + final double mn = getNumericalMean(); + + return (sc * sc) * Math.exp(LogGamma.value(1 + (2 / sh))) - + (mn * mn); + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + @Override + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is always positive infinity + * no matter the parameters. + * + * @return upper bound of the support (always + * {@code Double.POSITIVE_INFINITY}) + */ + @Override + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } +} + http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ZipfDistribution.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ZipfDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ZipfDistribution.java new file mode 100644 index 0000000..f0d54a1 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/ZipfDistribution.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.DiscreteSampler; +import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler; + +/** + * Implementation of the <a href="https://en.wikipedia.org/wiki/Zipf's_law">Zipf distribution</a>. + * <p> + * <strong>Parameters:</strong> + * For a random variable {@code X} whose values are distributed according to this + * distribution, the probability mass function is given by + * <pre> + * P(X = k) = H(N,s) * 1 / k^s for {@code k = 1,2,...,N}. + * </pre> + * {@code H(N,s)} is the normalizing constant + * which corresponds to the generalized harmonic number of order N of s. + * <ul> + * <li>{@code N} is the number of elements</li> + * <li>{@code s} is the exponent</li> + * </ul> + */ +public class ZipfDistribution extends AbstractDiscreteDistribution { + /** Number of elements. */ + private final int numberOfElements; + /** Exponent parameter of the distribution. */ + private final double exponent; + /** Cached values of the nth generalized harmonic. */ + private final double nthHarmonic; + + /** + * Creates a distribution. + * + * @param numberOfElements Number of elements. + * @param exponent Exponent. + * @exception IllegalArgumentException if {@code numberOfElements <= 0} + * or {@code exponent <= 0}. + */ + public ZipfDistribution(int numberOfElements, + double exponent) { + if (numberOfElements <= 0) { + throw new DistributionException(DistributionException.NEGATIVE, + numberOfElements); + } + if (exponent <= 0) { + throw new DistributionException(DistributionException.NEGATIVE, + exponent); + } + + this.numberOfElements = numberOfElements; + this.exponent = exponent; + this.nthHarmonic = generalizedHarmonic(numberOfElements, exponent); + } + + /** + * Get the number of elements (e.g. corpus size) for the distribution. + * + * @return the number of elements + */ + public int getNumberOfElements() { + return numberOfElements; + } + + /** + * Get the exponent characterizing the distribution. + * + * @return the exponent + */ + public double getExponent() { + return exponent; + } + + /** {@inheritDoc} */ + @Override + public double probability(final int x) { + if (x <= 0 || x > numberOfElements) { + return 0; + } + + return (1 / Math.pow(x, exponent)) / nthHarmonic; + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + if (x <= 0 || x > numberOfElements) { + return Double.NEGATIVE_INFINITY; + } + + return -Math.log(x) * exponent - Math.log(nthHarmonic); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(final int x) { + if (x <= 0) { + return 0; + } else if (x >= numberOfElements) { + return 1; + } + + return generalizedHarmonic(x, exponent) / nthHarmonic; + } + + /** + * {@inheritDoc} + * + * For number of elements {@code N} and exponent {@code s}, the mean is + * {@code Hs1 / Hs}, where + * <ul> + * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li> + * <li>{@code Hs = generalizedHarmonic(N, s)}.</li> + * </ul> + */ + @Override + public double getNumericalMean() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = nthHarmonic; + + return Hs1 / Hs; + } + + /** + * {@inheritDoc} + * + * For number of elements {@code N} and exponent {@code s}, the mean is + * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where + * <ul> + * <li>{@code Hs2 = generalizedHarmonic(N, s - 2)},</li> + * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li> + * <li>{@code Hs = generalizedHarmonic(N, s)}.</li> + * </ul> + */ + @Override + public double getNumericalVariance() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs2 = generalizedHarmonic(N, s - 2); + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = nthHarmonic; + + return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); + } + + /** + * Calculates the Nth generalized harmonic number. See + * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic + * Series</a>. + * + * @param n Term in the series to calculate (must be larger than 1) + * @param m Exponent (special case {@code m = 1} is the harmonic series). + * @return the n<sup>th</sup> generalized harmonic number. + */ + private double generalizedHarmonic(final int n, final double m) { + double value = 0; + for (int k = n; k > 0; --k) { + value += 1 / Math.pow(k, m); + } + return value; + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 1 no matter the parameters. + * + * @return lower bound of the support (always 1) + */ + @Override + public int getSupportLowerBound() { + return 1; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is the number of elements. + * + * @return upper bound of the support + */ + @Override + public int getSupportUpperBound() { + return getNumberOfElements(); + } + + /** + * {@inheritDoc} + * + * The support of this distribution is connected. + * + * @return {@code true} + */ + @Override + public boolean isSupportConnected() { + return true; + } + + /**{@inheritDoc} */ + @Override + public DiscreteDistribution.Sampler createSampler(final UniformRandomProvider rng) { + return new DiscreteDistribution.Sampler() { + /** + * Zipf distribution sampler. + */ + private final DiscreteSampler sampler = + new RejectionInversionZipfSampler(rng, numberOfElements, exponent); + + /**{@inheritDoc} */ + @Override + public int sample() { + return sampler.sample(); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/commons-statistics/blob/05626d01/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/package-info.java ---------------------------------------------------------------------- diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/package-info.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/package-info.java new file mode 100644 index 0000000..98315c6 --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Implementations of common discrete and continuous distributions. + */ +package org.apache.commons.statistics.distribution;