This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-numbers.git
The following commit(s) were added to refs/heads/master by this push: new 1c457b7 NUMBERS-142: Update LinearCombination to use the dot2s algorithm 1c457b7 is described below commit 1c457b74cd54ee0b09b274d57543e82633f47bb2 Author: aherbert <aherb...@apache.org> AuthorDate: Wed Apr 21 17:51:19 2021 +0100 NUMBERS-142: Update LinearCombination to use the dot2s algorithm - Avoids construction of an intermediate array for the dot product of array input - Perform splitting using Dekker's multiplication algorithm to retain all bits of precision - Handle overflow during the split for large numbers Splitting now correctly handles sub-normal numbers with no information in the upper 26-bits as input. The high part will be sub-normal and the low part will be zero. The previous split created a zero high part and the input as the low part. --- .../commons/numbers/arrays/ExtendedPrecision.java | 286 ++++++++++++++++++ .../commons/numbers/arrays/LinearCombination.java | 330 +++++++-------------- .../numbers/arrays/ExtendedPrecisionTest.java | 107 +++++++ .../numbers/arrays/LinearCombinationTest.java | 236 +++++++++++---- src/changes/changes.xml | 5 + 5 files changed, 672 insertions(+), 292 deletions(-) diff --git a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/ExtendedPrecision.java b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/ExtendedPrecision.java new file mode 100644 index 0000000..da7a72d --- /dev/null +++ b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/ExtendedPrecision.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.numbers.arrays; + +/** + * Computes extended precision floating-point operations. + * + * <p>It is based on the 1971 paper + * <a href="https://doi.org/10.1007/BF01397083"> + * Dekker (1971) A floating-point technique for extending the available precision</a>. + */ +final class ExtendedPrecision { + /* + * Caveat: + * + * The code below uses many additions/subtractions that may + * appear redundant. However, they should NOT be simplified, as they + * do use IEEE754 floating point arithmetic rounding properties. + * + * Algorithms are based on computing the product or sum of two values x and y in + * extended precision. The standard result is stored using a double (high part z) and + * the round-off error (or low part zz) is stored in a second double, e.g: + * x * y = (z, zz); z + zz = x * y + * x + y = (z, zz); z + zz = x + y + * + * To sum multiple (z, zz) results ideally the parts are sorted in order of + * non-decreasing magnitude and summed. This is exact if each number's most significant + * bit is below the least significant bit of the next (i.e. does not + * overlap). Creating non-overlapping parts requires a rebalancing + * of adjacent pairs using a summation z + zz = (z1, zz1) iteratively through the parts + * (see Shewchuk (1997) Grow-Expansion and Expansion-Sum [1]). + * + * [1] Shewchuk (1997): Arbitrary Precision Floating-Point Arithmetic + * http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps + */ + + /** + * The multiplier used to split the double value into high and low parts. From + * Dekker (1971): "The constant should be chosen equal to 2^(p - p/2) + 1, + * where p is the number of binary digits in the mantissa". Here p is 53 + * and the multiplier is {@code 2^27 + 1}. + */ + private static final double MULTIPLIER = 1.0 + 0x1.0p27; + + /** + * The upper limit above which a number may overflow during the split into a high part. + * Assuming the multiplier is above 2^27 and the maximum exponent is 1023 then a safe + * limit is a value with an exponent of (1023 - 27) = 2^996. + * 996 is the value obtained from {@code Math.getExponent(Double.MAX_VALUE / MULTIPLIER)}. + */ + private static final double SAFE_UPPER = 0x1.0p996; + + /** The scale to use when down-scaling during a split into a high part. + * This must be smaller than the inverse of the multiplier and a power of 2 for exact scaling. */ + private static final double DOWN_SCALE = 0x1.0p-30; + + /** The scale to use when re-scaling during a split into a high part. + * This is the inverse of {@link #DOWN_SCALE}. */ + private static final double UP_SCALE = 0x1.0p30; + + /** The mask to extract the raw 11-bit exponent. + * The value must be shifted 52-bits to remove the mantissa bits. */ + private static final int EXP_MASK = 0x7ff; + + /** The value 2046 converted for use if using {@link Integer#compareUnsigned(int, int)}. + * This requires adding {@link Integer#MIN_VALUE} to 2046. */ + private static final int CMP_UNSIGNED_2046 = Integer.MIN_VALUE + 2046; + + /** The value -1 converted for use if using {@link Integer#compareUnsigned(int, int)}. + * This requires adding {@link Integer#MIN_VALUE} to -1. */ + private static final int CMP_UNSIGNED_MINUS_1 = Integer.MIN_VALUE - 1; + + /** Private constructor. */ + private ExtendedPrecision() { + // intentionally empty. + } + + /** + * Compute the low part of the double length number {@code (z,zz)} for the exact + * product of {@code x} and {@code y}. This is equivalent to computing a {@code double} + * containing the magnitude of the rounding error when converting the exact 106-bit + * significand of the multiplication result to a 53-bit significand. + * + * <p>The method is written to be functionally similar to using a fused multiply add (FMA) + * operation to compute the low part, for example JDK 9's Math.fma function (note the sign + * change in the input argument for the product): + * <pre> + * double x = ...; + * double y = ...; + * double xy = x * y; + * double low1 = Math.fma(x, y, -xy); + * double low2 = productLow(x, y, xy); + * </pre> + * + * <p>Special cases: + * + * <ul> + * <li>If {@code x * y} is sub-normal or zero then the result is 0.0. + * <li>If {@code x * y} is infinite or NaN then the result is NaN. + * </ul> + * + * @param x First factor. + * @param y Second factor. + * @param xy Product of the factors (x * y). + * @return the low part of the product double length number + * @see #highPartUnscaled(double) + */ + static double productLow(double x, double y, double xy) { + // Verify the input. This must be NaN safe. + //assert Double.compare(x * y, xy) == 0 + + // If the number is sub-normal, inf or nan there is no round-off. + if (isNotNormal(xy)) { + // Returns 0.0 for sub-normal xy, otherwise NaN for inf/nan: + return xy - xy; + } + + // The result xy is finite and normal. + // Use Dekker's mul12 algorithm that splits the values into high and low parts. + // Dekker's split using multiplication will overflow if the value is within 2^27 + // of double max value. It can also produce 26-bit approximations that are larger + // than the input numbers for the high part causing overflow in hx * hy when + // x * y does not overflow. So we must scale down big numbers. + // We only have to scale the largest number as we know the product does not overflow + // (if one is too big then the other cannot be). + // We also scale if the product is close to overflow to avoid intermediate overflow. + // This could be done at a higher limit (e.g. Math.abs(xy) > Double.MAX_VALUE / 4) + // but is included here to have a single low probability branch condition. + + // Add the absolute inputs for a single comparison. The sum will not be more than + // 3-fold higher than any component. + final double a = Math.abs(x); + final double b = Math.abs(y); + if (a + b + Math.abs(xy) >= SAFE_UPPER) { + // Only required to scale the largest number as x*y does not overflow. + if (a > b) { + return productLowUnscaled(x * DOWN_SCALE, y, xy * DOWN_SCALE) * UP_SCALE; + } + return productLowUnscaled(x, y * DOWN_SCALE, xy * DOWN_SCALE) * UP_SCALE; + } + + // No scaling required + return productLowUnscaled(x, y, xy); + } + + /** + * Checks if the number is not normal. This is functionally equivalent to: + * <pre> + * final double abs = Math.abs(a); + * return (abs <= Double.MIN_NORMAL || !(abs <= Double.MAX_VALUE)); + * </pre> + * + * @param a The value. + * @return true if the value is not normal + */ + static boolean isNotNormal(double a) { + // Sub-normal numbers have a biased exponent of 0. + // Inf/NaN numbers have a biased exponent of 2047. + // Catch both cases by extracting the raw exponent, subtracting 1 + // and compare unsigned (so 0 underflows to a unsigned large value). + final int baisedExponent = ((int) (Double.doubleToRawLongBits(a) >>> 52)) & EXP_MASK; + // Pre-compute the additions used by Integer.compareUnsigned + return baisedExponent + CMP_UNSIGNED_MINUS_1 >= CMP_UNSIGNED_2046; + } + + /** + * Compute the low part of the double length number {@code (z,zz)} for the exact + * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard + * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y} + * are split into high and low parts using Dekker's algorithm. + * + * <p>Warning: This method does not perform scaling in Dekker's split and large + * finite numbers can create NaN results. + * + * @param x First factor. + * @param y Second factor. + * @param xy Product of the factors (x * y). + * @return the low part of the product double length number + * @see #highPartUnscaled(double) + * @see #productLow(double, double, double, double, double) + */ + private static double productLowUnscaled(double x, double y, double xy) { + // Split the numbers using Dekker's algorithm without scaling + final double hx = highPartUnscaled(x); + final double lx = x - hx; + + final double hy = highPartUnscaled(y); + final double ly = y - hy; + + return productLow(hx, lx, hy, ly, xy); + } + + /** + * Compute the low part of the double length number {@code (z,zz)} for the exact + * product of {@code x} and {@code y} using Dekker's mult12 algorithm. The standard + * precision product {@code x*y} must be provided. The numbers {@code x} and {@code y} + * should already be split into low and high parts. + * + * <p>Note: This uses the high part of the result {@code (z,zz)} as {@code x * y} and not + * {@code hx * hy + hx * ty + tx * hy} as specified in Dekker's original paper. + * See Shewchuk (1997) for working examples. + * + * @param hx High part of first factor. + * @param lx Low part of first factor. + * @param hy High part of second factor. + * @param ly Low part of second factor. + * @param xy Product of the factors. + * @return <code>lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly)</code> + * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> + * Shewchuk (1997) Theorum 18</a> + */ + private static double productLow(double hx, double lx, double hy, double ly, double xy) { + // Compute the multiply low part: + // err1 = xy - hx * hy + // err2 = err1 - lx * hy + // err3 = err2 - hx * ly + // low = lx * ly - err3 + return lx * ly - (((xy - hx * hy) - lx * hy) - hx * ly); + } + + /** + * Implement Dekker's method to split a value into two parts. Multiplying by (2^s + 1) creates + * a big value from which to derive the two split parts. + * <pre> + * c = (2^s + 1) * a + * a_big = c - a + * a_hi = c - a_big + * a_lo = a - a_hi + * a = a_hi + a_lo + * </pre> + * + * <p>The multiplicand allows a p-bit value to be split into + * (p-s)-bit value {@code a_hi} and a non-overlapping (s-1)-bit value {@code a_lo}. + * Combined they have (p-1) bits of significand but the sign bit of {@code a_lo} + * contains a bit of information. The constant is chosen so that s is ceil(p/2) where + * the precision p for a double is 53-bits (1-bit of the mantissa is assumed to be + * 1 for a non sub-normal number) and s is 27. + * + * <p>This conversion does not use scaling and the result of overflow is NaN. Overflow + * may occur when the exponent of the input value is above 996. + * + * <p>Splitting a NaN or infinite value will return NaN. + * + * @param value Value. + * @return the high part of the value. + * @see Math#getExponent(double) + */ + static double highPartUnscaled(double value) { + final double c = MULTIPLIER * value; + return c - (c - value); + } + + /** + * Compute the round-off from the sum of two numbers {@code a} and {@code b} using + * Knuth's two-sum algorithm. The values are not required to be ordered by magnitude. + * The standard precision sum must be provided. + * + * @param a First part of sum. + * @param b Second part of sum. + * @param sum Sum of the parts (a + b). + * @return <code>(b - (sum - (sum - b))) + (a - (sum - b))</code> + * @see <a href="http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps"> + * Shewchuk (1997) Theorum 7</a> + */ + static double twoSumLow(double a, double b, double sum) { + final double bVirtual = sum - a; + // sum - bVirtual == aVirtual. + // a - aVirtual == a round-off + // b - bVirtual == b round-off + return (a - (sum - bVirtual)) + (b - bVirtual); + } +} diff --git a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/LinearCombination.java b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/LinearCombination.java index 052857c..fa9ddfb 100644 --- a/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/LinearCombination.java +++ b/commons-numbers-arrays/src/main/java/org/apache/commons/numbers/arrays/LinearCombination.java @@ -23,8 +23,8 @@ package org.apache.commons.numbers.arrays; * It does so by using specific multiplication and addition algorithms to * preserve accuracy and reduce cancellation effects. * - * It is based on the 2005 paper - * <a href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.2.1547"> + * <p>It is based on the 2005 paper + * <a href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.2.1547"> * Accurate Sum and Dot Product</a> by Takeshi Ogita, Siegfried M. Rump, * and Shin'ichi Oishi published in <em>SIAM J. Sci. Comput</em>. */ @@ -32,15 +32,30 @@ public final class LinearCombination { /* * Caveat: * - * The code below is split in many additions/subtractions that may + * The code below uses many additions/subtractions that may * appear redundant. However, they should NOT be simplified, as they * do use IEEE754 floating point arithmetic rounding properties. - * The variables naming conventions are that xyzHigh contains the most significant - * bits of xyz and xyzLow contains its least significant bits. So theoretically - * xyz is the sum xyzHigh + xyzLow, but in many cases below, this sum cannot - * be represented in only one double precision number so we preserve two numbers - * to hold it as long as we can, combining the high and low order bits together - * only at the end, after cancellation may have occurred on high order bits + * + * Algorithms are based on computing the product or sum of two values x and y in + * extended precision. The standard result is stored using a double (high part z) and + * the round-off error (or low part zz) is stored in a second double, e.g: + * x * y = (z, zz); z + zz = x * y + * x + y = (z, zz); z + zz = x + y + * + * To sum multiple (z, zz) results ideally the parts are sorted in order of + * non-decreasing magnitude and summed. This is exact if each number's most significant + * bit is below the least significant bit of the next (i.e. does not + * overlap). Creating non-overlapping parts requires a rebalancing + * of adjacent pairs using a summation z + zz = (z1, zz1) iteratively through the parts + * (see Shewchuk (1997) Grow-Expansion and Expansion-Sum [1]). + * + * In this class the sum of the low parts in computed separately from the sum of the + * high parts for an approximate 2-fold increase in precision in the event of cancellation + * (sum positives and negatives to a result of much smaller magnitude than the parts). + * Uses the dot2s algorithm of Ogita to avoid allocation of an array to store intermediates. + * + * [1] Shewchuk (1997): Arbitrary Precision Floating-Point Arithmetic + * http://www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps */ /** Private constructor. */ @@ -60,57 +75,26 @@ public final class LinearCombination { throw new IllegalArgumentException("Dimension mismatch: " + a.length + " != " + b.length); } + // Implement dot2s (Algorithm 5.4) from Ogita et al (2005). final int len = a.length; - if (len == 1) { - // Revert to scalar multiplication. - return a[0] * b[0]; - } - - final double[] prodHigh = new double[len]; - double prodLowSum = 0; - - for (int i = 0; i < len; i++) { - final double ai = a[i]; - final double aHigh = highPart(ai); - final double aLow = ai - aHigh; - - final double bi = b[i]; - final double bHigh = highPart(bi); - final double bLow = bi - bHigh; - prodHigh[i] = ai * bi; - final double prodLow = prodLow(aLow, bLow, prodHigh[i], aHigh, bHigh); - prodLowSum += prodLow; - } + // p is the standard scalar product sum. + // s is the sum of round-off parts. + double p = a[0] * b[0]; + double s = ExtendedPrecision.productLow(a[0], b[0], p); + // Remaining split products added to the current sum and round-off sum. + for (int i = 1; i < len; i++) { + final double h = a[i] * b[i]; + final double r = ExtendedPrecision.productLow(a[i], b[i], h); - final double prodHighCur = prodHigh[0]; - double prodHighNext = prodHigh[1]; - double sHighPrev = prodHighCur + prodHighNext; - double sPrime = sHighPrev - prodHighNext; - double sLowSum = (prodHighNext - (sHighPrev - sPrime)) + (prodHighCur - sPrime); - - final int lenMinusOne = len - 1; - for (int i = 1; i < lenMinusOne; i++) { - prodHighNext = prodHigh[i + 1]; - final double sHighCur = sHighPrev + prodHighNext; - sPrime = sHighCur - prodHighNext; - sLowSum += (prodHighNext - (sHighCur - sPrime)) + (sHighPrev - sPrime); - sHighPrev = sHighCur; + final double x = p + h; + // s_i = s_(i-1) + (q_i + r_i) + s += ExtendedPrecision.twoSumLow(p, h, x) + r; + p = x; } - double result = sHighPrev + (prodLowSum + sLowSum); - - if (Double.isNaN(result)) { - // either we have split infinite numbers or some coefficients were NaNs, - // just rely on the naive implementation and let IEEE754 handle this - result = 0; - for (int i = 0; i < len; ++i) { - result += a[i] * b[i]; - } - } - - return result; + return getSum(p, p + s); } /** @@ -126,42 +110,17 @@ public final class LinearCombination { */ public static double value(double a1, double b1, double a2, double b2) { - // split a1 and b1 as one 26 bits number and one 27 bits number - final double a1High = highPart(a1); - final double a1Low = a1 - a1High; - final double b1High = highPart(b1); - final double b1Low = b1 - b1High; - - // accurate multiplication a1 * b1 - final double prod1High = a1 * b1; - final double prod1Low = prodLow(a1Low, b1Low, prod1High, a1High, b1High); - - // split a2 and b2 as one 26 bits number and one 27 bits number - final double a2High = highPart(a2); - final double a2Low = a2 - a2High; - final double b2High = highPart(b2); - final double b2Low = b2 - b2High; - - // accurate multiplication a2 * b2 - final double prod2High = a2 * b2; - final double prod2Low = prodLow(a2Low, b2Low, prod2High, a2High, b2High); - - // accurate addition a1 * b1 + a2 * b2 - final double s12High = prod1High + prod2High; - final double s12Prime = s12High - prod2High; - final double s12Low = (prod2High - (s12High - s12Prime)) + (prod1High - s12Prime); - - // final rounding, s12 may have suffered many cancellations, we try - // to recover some bits from the extra words we have saved up to now - double result = s12High + (prod1Low + prod2Low + s12Low); - - if (Double.isNaN(result)) { - // either we have split infinite numbers or some coefficients were NaNs, - // just rely on the naive implementation and let IEEE754 handle this - result = a1 * b1 + a2 * b2; - } - - return result; + // p/pn are the standard scalar product old/new sum. + // s is the sum of round-off parts. + final double p = a1 * b1; + double s = ExtendedPrecision.productLow(a1, b1, p); + final double h = a2 * b2; + final double r = ExtendedPrecision.productLow(a2, b2, h); + final double pn = p + h; + s += ExtendedPrecision.twoSumLow(p, h, pn) + r; + + // Final summation + return getSum(pn, pn + s); } /** @@ -180,57 +139,22 @@ public final class LinearCombination { public static double value(double a1, double b1, double a2, double b2, double a3, double b3) { - // split a1 and b1 as one 26 bits number and one 27 bits number - final double a1High = highPart(a1); - final double a1Low = a1 - a1High; - final double b1High = highPart(b1); - final double b1Low = b1 - b1High; - - // accurate multiplication a1 * b1 - final double prod1High = a1 * b1; - final double prod1Low = prodLow(a1Low, b1Low, prod1High, a1High, b1High); - - // split a2 and b2 as one 26 bits number and one 27 bits number - final double a2High = highPart(a2); - final double a2Low = a2 - a2High; - final double b2High = highPart(b2); - final double b2Low = b2 - b2High; - - // accurate multiplication a2 * b2 - final double prod2High = a2 * b2; - final double prod2Low = prodLow(a2Low, b2Low, prod2High, a2High, b2High); - - // split a3 and b3 as one 26 bits number and one 27 bits number - final double a3High = highPart(a3); - final double a3Low = a3 - a3High; - final double b3High = highPart(b3); - final double b3Low = b3 - b3High; - - // accurate multiplication a3 * b3 - final double prod3High = a3 * b3; - final double prod3Low = prodLow(a3Low, b3Low, prod3High, a3High, b3High); - - // accurate addition a1 * b1 + a2 * b2 - final double s12High = prod1High + prod2High; - final double s12Prime = s12High - prod2High; - final double s12Low = (prod2High - (s12High - s12Prime)) + (prod1High - s12Prime); - - // accurate addition a1 * b1 + a2 * b2 + a3 * b3 - final double s123High = s12High + prod3High; - final double s123Prime = s123High - prod3High; - final double s123Low = (prod3High - (s123High - s123Prime)) + (s12High - s123Prime); - - // final rounding, s123 may have suffered many cancellations, we try - // to recover some bits from the extra words we have saved up to now - double result = s123High + (prod1Low + prod2Low + prod3Low + s12Low + s123Low); - - if (Double.isNaN(result)) { - // either we have split infinite numbers or some coefficients were NaNs, - // just rely on the naive implementation and let IEEE754 handle this - result = a1 * b1 + a2 * b2 + a3 * b3; - } - - return result; + // p/q are the standard scalar product old/new sum (alternating). + // s is the sum of round-off parts. + // pn is the final scalar product sum. + final double p = a1 * b1; + double s = ExtendedPrecision.productLow(a1, b1, p); + double h = a2 * b2; + double r = ExtendedPrecision.productLow(a2, b2, h); + final double q = p + h; + s += r + ExtendedPrecision.twoSumLow(p, h, q); + h = a3 * b3; + r = ExtendedPrecision.productLow(a3, b3, h); + final double pn = q + h; + s += r + ExtendedPrecision.twoSumLow(q, h, pn); + + // Final summation + return getSum(pn, pn + s); } /** @@ -252,95 +176,47 @@ public final class LinearCombination { double a2, double b2, double a3, double b3, double a4, double b4) { - // split a1 and b1 as one 26 bits number and one 27 bits number - final double a1High = highPart(a1); - final double a1Low = a1 - a1High; - final double b1High = highPart(b1); - final double b1Low = b1 - b1High; - - // accurate multiplication a1 * b1 - final double prod1High = a1 * b1; - final double prod1Low = prodLow(a1Low, b1Low, prod1High, a1High, b1High); - - // split a2 and b2 as one 26 bits number and one 27 bits number - final double a2High = highPart(a2); - final double a2Low = a2 - a2High; - final double b2High = highPart(b2); - final double b2Low = b2 - b2High; - - // accurate multiplication a2 * b2 - final double prod2High = a2 * b2; - final double prod2Low = prodLow(a2Low, b2Low, prod2High, a2High, b2High); - - // split a3 and b3 as one 26 bits number and one 27 bits number - final double a3High = highPart(a3); - final double a3Low = a3 - a3High; - final double b3High = highPart(b3); - final double b3Low = b3 - b3High; - - // accurate multiplication a3 * b3 - final double prod3High = a3 * b3; - final double prod3Low = prodLow(a3Low, b3Low, prod3High, a3High, b3High); - - // split a4 and b4 as one 26 bits number and one 27 bits number - final double a4High = highPart(a4); - final double a4Low = a4 - a4High; - final double b4High = highPart(b4); - final double b4Low = b4 - b4High; - - // accurate multiplication a4 * b4 - final double prod4High = a4 * b4; - final double prod4Low = prodLow(a4Low, b4Low, prod4High, a4High, b4High); - - // accurate addition a1 * b1 + a2 * b2 - final double s12High = prod1High + prod2High; - final double s12Prime = s12High - prod2High; - final double s12Low = (prod2High - (s12High - s12Prime)) + (prod1High - s12Prime); - - // accurate addition a1 * b1 + a2 * b2 + a3 * b3 - final double s123High = s12High + prod3High; - final double s123Prime = s123High - prod3High; - final double s123Low = (prod3High - (s123High - s123Prime)) + (s12High - s123Prime); - - // accurate addition a1 * b1 + a2 * b2 + a3 * b3 + a4 * b4 - final double s1234High = s123High + prod4High; - final double s1234Prime = s1234High - prod4High; - final double s1234Low = (prod4High - (s1234High - s1234Prime)) + (s123High - s1234Prime); - - // final rounding, s1234 may have suffered many cancellations, we try - // to recover some bits from the extra words we have saved up to now - double result = s1234High + (prod1Low + prod2Low + prod3Low + prod4Low + s12Low + s123Low + s1234Low); - - if (Double.isNaN(result)) { - // either we have split infinite numbers or some coefficients were NaNs, - // just rely on the naive implementation and let IEEE754 handle this - result = a1 * b1 + a2 * b2 + a3 * b3 + a4 * b4; - } - - return result; + // p/q are the standard scalar product old/new sum (alternating). + // s is the sum of round-off parts. + // pn is the final scalar product sum. + double p = a1 * b1; + double s = ExtendedPrecision.productLow(a1, b1, p); + double h = a2 * b2; + double r = ExtendedPrecision.productLow(a2, b2, h); + final double q = p + h; + s += ExtendedPrecision.twoSumLow(p, h, q) + r; + h = a3 * b3; + r = ExtendedPrecision.productLow(a3, b3, h); + p = q + h; + s += ExtendedPrecision.twoSumLow(q, h, p) + r; + h = a4 * b4; + r = ExtendedPrecision.productLow(a4, b4, h); + final double pn = p + h; + s += ExtendedPrecision.twoSumLow(p, h, pn) + r; + + // Final summation + return getSum(pn, pn + s); } /** - * @param value Value. - * @return the high part of the value. - */ - private static double highPart(double value) { - return Double.longBitsToDouble(Double.doubleToRawLongBits(value) & ((-1L) << 27)); - } - - /** - * @param aLow Low part of first factor. - * @param bLow Low part of second factor. - * @param prodHigh Product of the factors. - * @param aHigh High part of first factor. - * @param bHigh High part of second factor. - * @return <code>aLow * bLow - (((prodHigh - aHigh * bHigh) - aLow * bHigh) - aHigh * bLow)</code> + * Gets the final sum. This checks the high precision sum is finite, otherwise + * returns the standard precision sum for the IEEE754 result. + * + * <p>The high precision sum may be non-finite due to input infinite + * or NaN numbers or overflow in the summation. In all cases returning the + * standard sum ensures the IEEE754 result. + * + * @param sum Standard sum. + * @param hpSum High precision sum. + * @return the sum */ - private static double prodLow(double aLow, - double bLow, - double prodHigh, - double aHigh, - double bHigh) { - return aLow * bLow - (((prodHigh - aHigh * bHigh) - aLow * bHigh) - aHigh * bLow); + private static double getSum(double sum, double hpSum) { + if (!Double.isFinite(hpSum)) { + // Either we have split infinite numbers, some coefficients were NaNs, + // or the sum overflowed. + // Return the naive implementation for the IEEE754 result. + return sum; + } + return hpSum; } } diff --git a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/ExtendedPrecisionTest.java b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/ExtendedPrecisionTest.java new file mode 100644 index 0000000..262d6c6 --- /dev/null +++ b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/ExtendedPrecisionTest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.numbers.arrays; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test cases for the {@link ExtendedPrecision} class. + */ +class ExtendedPrecisionTest { + @Test + void testSplitAssumptions() { + // The multiplier used to split the double value into high and low parts. + final double scale = (1 << 27) + 1; + // The upper limit above which a number may overflow during the split into a high part. + final double limit = 0x1.0p996; + Assertions.assertTrue(Double.isFinite(limit * scale)); + Assertions.assertTrue(Double.isFinite(-limit * scale)); + // Cannot make the limit the next power up + Assertions.assertEquals(Double.POSITIVE_INFINITY, limit * 2 * scale); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, -limit * 2 * scale); + // Check the level for the safe upper limit of the exponent of the sum of the absolute + // components of the product + Assertions.assertTrue(Math.getExponent(2 * Math.sqrt(Double.MAX_VALUE)) - 2 > 508); + } + + @Test + void testHighPartUnscaled() { + Assertions.assertEquals(Double.NaN, ExtendedPrecision.highPartUnscaled(Double.POSITIVE_INFINITY)); + Assertions.assertEquals(Double.NaN, ExtendedPrecision.highPartUnscaled(Double.NEGATIVE_INFINITY)); + Assertions.assertEquals(Double.NaN, ExtendedPrecision.highPartUnscaled(Double.NaN)); + // Large finite numbers will overflow during the split + Assertions.assertEquals(Double.NaN, ExtendedPrecision.highPartUnscaled(Double.MAX_VALUE)); + Assertions.assertEquals(Double.NaN, ExtendedPrecision.highPartUnscaled(-Double.MAX_VALUE)); + } + + /** + * Test {@link ExtendedPrecision#productLow(double, double, double)} computes the same + * result as JDK 9 Math.fma(x, y, -x * y) for edge cases. + */ + @Test + void testProductLow() { + assertProductLow(0.0, 1.0, Math.nextDown(Double.MIN_NORMAL)); + assertProductLow(0.0, -1.0, Math.nextDown(Double.MIN_NORMAL)); + assertProductLow(Double.NaN, 1.0, Double.POSITIVE_INFINITY); + assertProductLow(Double.NaN, 1.0, Double.NEGATIVE_INFINITY); + assertProductLow(Double.NaN, 1.0, Double.NaN); + assertProductLow(0.0, 1.0, Double.MAX_VALUE); + assertProductLow(Double.NaN, 2.0, Double.MAX_VALUE); + } + + private static void assertProductLow(double expected, double x, double y) { + // Requires a delta of 0.0 to assert -0.0 == 0.0 + Assertions.assertEquals(expected, ExtendedPrecision.productLow(x, y, x * y), 0.0); + } + + @Test + void testIsNotNormal() { + for (double a : new double[] {Double.MAX_VALUE, 1.0, Double.MIN_NORMAL}) { + Assertions.assertFalse(ExtendedPrecision.isNotNormal(a)); + Assertions.assertFalse(ExtendedPrecision.isNotNormal(-a)); + } + for (double a : new double[] {Double.POSITIVE_INFINITY, 0.0, + Math.nextDown(Double.MIN_NORMAL), Double.NaN}) { + Assertions.assertTrue(ExtendedPrecision.isNotNormal(a)); + Assertions.assertTrue(ExtendedPrecision.isNotNormal(-a)); + } + } + + /** + * This demonstrates splitting a sub normal number with no information in the upper 26 bits + * of the mantissa. + */ + @Test + void testSubNormalSplit() { + final double a = Double.longBitsToDouble(1L << 25); + + // A split using masking of the mantissa bits computes the high part incorrectly + final double hi1 = Double.longBitsToDouble(Double.doubleToRawLongBits(a) & ((-1L) << 27)); + final double lo1 = a - hi1; + Assertions.assertEquals(0, hi1); + Assertions.assertEquals(a, lo1); + Assertions.assertFalse(Math.abs(hi1) > Math.abs(lo1)); + + // Dekker's split + final double hi2 = ExtendedPrecision.highPartUnscaled(a); + final double lo2 = a - hi2; + Assertions.assertEquals(a, hi2); + Assertions.assertEquals(0, lo2); + Assertions.assertTrue(Math.abs(hi2) > Math.abs(lo2)); + } +} diff --git a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/LinearCombinationTest.java b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/LinearCombinationTest.java index 6711b0c..855c46c 100644 --- a/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/LinearCombinationTest.java +++ b/commons-numbers-arrays/src/test/java/org/apache/commons/numbers/arrays/LinearCombinationTest.java @@ -27,6 +27,12 @@ import org.apache.commons.numbers.fraction.BigFraction; * Test cases for the {@link LinearCombination} class. */ class LinearCombinationTest { + @Test + void testDimensionMismatch() { + Assertions.assertThrows(IllegalArgumentException.class, + () -> LinearCombination.value(new double[1], new double[2])); + } + // MATH-1005 @Test void testSingleElementArray() { @@ -147,7 +153,7 @@ class LinearCombinationTest { } @Test - void testInfinite() { + void testNonFinite() { final double[][] a = new double[][] { {1, 2, 3, 4}, {1, Double.POSITIVE_INFINITY, 3, 4}, @@ -156,7 +162,10 @@ class LinearCombinationTest { {1, 2, 3, 4}, {1, 2, 3, 4}, {1, 2, 3, 4}, - {1, 2, 3, 4} + {1, 2, 3, 4}, + {1, Double.MAX_VALUE, 3, 4}, + {1, 2, Double.MAX_VALUE, 4}, + {1, Double.MAX_VALUE / 2, 3, -Double.MAX_VALUE / 4}, }; final double[][] b = new double[][] { {1, -2, 3, 4}, @@ -166,135 +175,232 @@ class LinearCombinationTest { {1, Double.POSITIVE_INFINITY, 3, 4}, {1, -2, Double.POSITIVE_INFINITY, 4}, {1, Double.POSITIVE_INFINITY, 3, Double.NEGATIVE_INFINITY}, - {Double.NaN, -2, 3, 4} + {Double.NaN, -2, 3, 4}, + {1, -2, 3, 4}, + {1, -2, 3, 4}, + {1, -2, 3, 4}, }; Assertions.assertEquals(-3, LinearCombination.value(a[0][0], b[0][0], - a[0][1], b[0][1]), - 1e-10); + a[0][1], b[0][1])); Assertions.assertEquals(6, LinearCombination.value(a[0][0], b[0][0], a[0][1], b[0][1], - a[0][2], b[0][2]), - 1e-10); + a[0][2], b[0][2])); Assertions.assertEquals(22, LinearCombination.value(a[0][0], b[0][0], a[0][1], b[0][1], a[0][2], b[0][2], - a[0][3], b[0][3]), - 1e-10); - Assertions.assertEquals(22, LinearCombination.value(a[0], b[0]), 1e-10); + a[0][3], b[0][3])); + Assertions.assertEquals(22, LinearCombination.value(a[0], b[0])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[1][0], b[1][0], - a[1][1], b[1][1]), - 1e-10); + a[1][1], b[1][1])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[1][0], b[1][0], a[1][1], b[1][1], - a[1][2], b[1][2]), - 1e-10); + a[1][2], b[1][2])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[1][0], b[1][0], a[1][1], b[1][1], a[1][2], b[1][2], - a[1][3], b[1][3]), - 1e-10); - Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[1], b[1]), 1e-10); + a[1][3], b[1][3])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[1], b[1])); Assertions.assertEquals(-3, LinearCombination.value(a[2][0], b[2][0], - a[2][1], b[2][1]), - 1e-10); + a[2][1], b[2][1])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[2][0], b[2][0], a[2][1], b[2][1], - a[2][2], b[2][2]), - 1e-10); + a[2][2], b[2][2])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[2][0], b[2][0], a[2][1], b[2][1], a[2][2], b[2][2], - a[2][3], b[2][3]), - 1e-10); - Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[2], b[2]), 1e-10); + a[2][3], b[2][3])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[2], b[2])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[3][0], b[3][0], - a[3][1], b[3][1]), - 1e-10); + a[3][1], b[3][1])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[3][0], b[3][0], a[3][1], b[3][1], - a[3][2], b[3][2]), - 1e-10); + a[3][2], b[3][2])); Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[3][0], b[3][0], a[3][1], b[3][1], a[3][2], b[3][2], - a[3][3], b[3][3]), - 1e-10); - Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[3], b[3]), 1e-10); + a[3][3], b[3][3])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[3], b[3])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[4][0], b[4][0], - a[4][1], b[4][1]), - 1e-10); + a[4][1], b[4][1])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[4][0], b[4][0], a[4][1], b[4][1], - a[4][2], b[4][2]), - 1e-10); + a[4][2], b[4][2])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[4][0], b[4][0], a[4][1], b[4][1], a[4][2], b[4][2], - a[4][3], b[4][3]), - 1e-10); - Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[4], b[4]), 1e-10); + a[4][3], b[4][3])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[4], b[4])); Assertions.assertEquals(-3, LinearCombination.value(a[5][0], b[5][0], - a[5][1], b[5][1]), - 1e-10); + a[5][1], b[5][1])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[5][0], b[5][0], a[5][1], b[5][1], - a[5][2], b[5][2]), - 1e-10); + a[5][2], b[5][2])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[5][0], b[5][0], a[5][1], b[5][1], a[5][2], b[5][2], - a[5][3], b[5][3]), - 1e-10); - Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[5], b[5]), 1e-10); + a[5][3], b[5][3])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[5], b[5])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[6][0], b[6][0], - a[6][1], b[6][1]), - 1e-10); + a[6][1], b[6][1])); Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[6][0], b[6][0], a[6][1], b[6][1], - a[6][2], b[6][2]), - 1e-10); - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[6][0], b[6][0], - a[6][1], b[6][1], - a[6][2], b[6][2], - a[6][3], b[6][3]))); - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[6], b[6]))); - - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[7][0], b[7][0], - a[7][1], b[7][1]))); - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[7][0], b[7][0], - a[7][1], b[7][1], - a[7][2], b[7][2]))); - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[7][0], b[7][0], - a[7][1], b[7][1], - a[7][2], b[7][2], - a[7][3], b[7][3]))); - Assertions.assertTrue(Double.isNaN(LinearCombination.value(a[7], b[7]))); + a[6][2], b[6][2])); + Assertions.assertEquals(Double.NaN, + LinearCombination.value(a[6][0], b[6][0], + a[6][1], b[6][1], + a[6][2], b[6][2], + a[6][3], b[6][3])); + Assertions.assertEquals(Double.NaN, LinearCombination.value(a[6], b[6])); + + Assertions.assertEquals(Double.NaN, + LinearCombination.value(a[7][0], b[7][0], + a[7][1], b[7][1])); + Assertions.assertEquals(Double.NaN, + LinearCombination.value(a[7][0], b[7][0], + a[7][1], b[7][1], + a[7][2], b[7][2])); + Assertions.assertEquals(Double.NaN, + LinearCombination.value(a[7][0], b[7][0], + a[7][1], b[7][1], + a[7][2], b[7][2], + a[7][3], b[7][3])); + Assertions.assertEquals(Double.NaN, LinearCombination.value(a[7], b[7])); + + Assertions.assertEquals(Double.NEGATIVE_INFINITY, + LinearCombination.value(a[8][0], b[8][0], + a[8][1], b[8][1])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, + LinearCombination.value(a[8][0], b[8][0], + a[8][1], b[8][1], + a[8][2], b[8][2])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, + LinearCombination.value(a[8][0], b[8][0], + a[8][1], b[8][1], + a[8][2], b[8][2], + a[8][3], b[8][3])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[8], b[8])); + + Assertions.assertEquals(-3, + LinearCombination.value(a[9][0], b[9][0], + a[9][1], b[9][1])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, + LinearCombination.value(a[9][0], b[9][0], + a[9][1], b[9][1], + a[9][2], b[9][2])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, + LinearCombination.value(a[9][0], b[9][0], + a[9][1], b[9][1], + a[9][2], b[9][2], + a[9][3], b[9][3])); + Assertions.assertEquals(Double.POSITIVE_INFINITY, LinearCombination.value(a[9], b[9])); + + Assertions.assertEquals(-Double.MAX_VALUE, + LinearCombination.value(a[10][0], b[10][0], + a[10][1], b[10][1])); + Assertions.assertEquals(-Double.MAX_VALUE, + LinearCombination.value(a[10][0], b[10][0], + a[10][1], b[10][1], + a[10][2], b[10][2])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, + LinearCombination.value(a[10][0], b[10][0], + a[10][1], b[10][1], + a[10][2], b[10][2], + a[10][3], b[10][3])); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, LinearCombination.value(a[10], b[10])); + } + + /** + * This creates a scenario where the split product will overflow but the standard + * precision computation will not. The result is expected to be in extended precision, + * i.e. the method correctly detects and handles intermediate overflow. + * + * <p>Note: This test assumes that LinearCombination computes a split number + * using Dekker's method. This can result in the high part of the number being + * greater in magnitude than the the original number due to round-off in the split. + */ + @Test + void testOverflow() { + // Create a simple dot product that is different in high precision and has + // values that create a high part above the original number. This can be done using + // a mantissa with almost all bits set to 1. + final double x = Math.nextDown(2.0); + final double y = -Math.nextDown(x); + final double xxMxy = x * x + x * y; + final double xxMxyHighPrecision = LinearCombination.value(x, x, x, y); + Assertions.assertNotEquals(xxMxy, xxMxyHighPrecision, "High precision result should be different"); + + // Scale it close to max value. + // The current exponent is 0 so the combined scale must be 1023-1 as the + // partial product x*x and x*y have an exponent 1 higher + Assertions.assertEquals(0, Math.getExponent(x)); + Assertions.assertEquals(0, Math.getExponent(y)); + + final double a1 = Math.scalb(x, 1022 - 30); + final double b1 = Math.scalb(x, 30); + final double a2 = a1; + final double b2 = Math.scalb(y, 30); + // Verify low precision result is scaled and finite + final double sxxMxy = Math.scalb(xxMxy, 1022); + Assertions.assertEquals(sxxMxy, a1 * b1 + a2 * b2); + Assertions.assertTrue(Double.isFinite(sxxMxy)); + + // High precision result using Dekker's multiplier. + final double m = (1 << 27) + 1; + // First demonstrate that Dekker's split will create overflow in the high part. + double c; + c = a1 * m; + final double ha1 = c - (c - a1); + c = b1 * m; + final double hb1 = c - (c - b1); + c = a2 * m; + final double ha2 = c - (c - a2); + c = b2 * m; + final double hb2 = c - (c - b2); + Assertions.assertTrue(Double.isFinite(ha1)); + Assertions.assertTrue(Double.isFinite(hb1)); + Assertions.assertTrue(Double.isFinite(ha2)); + Assertions.assertTrue(Double.isFinite(hb2)); + // High part should be bigger in magnitude + Assertions.assertTrue(Math.abs(ha1) > Math.abs(a1)); + Assertions.assertTrue(Math.abs(hb1) > Math.abs(b1)); + Assertions.assertTrue(Math.abs(ha2) > Math.abs(a2)); + Assertions.assertTrue(Math.abs(hb2) > Math.abs(b2)); + Assertions.assertEquals(Double.POSITIVE_INFINITY, ha1 * hb1, "Expected split high part to overflow"); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, ha2 * hb2, "Expected split high part to overflow"); + + // LinearCombination should detect and handle intermediate overflow and return the + // high precision result. + final double expected = Math.scalb(xxMxyHighPrecision, 1022); + Assertions.assertEquals(expected, LinearCombination.value(a1, b1, a2, b2)); + Assertions.assertEquals(expected, LinearCombination.value(a1, b1, a2, b2, 0, 0)); + Assertions.assertEquals(expected, LinearCombination.value(a1, b1, a2, b2, 0, 0, 0, 0)); + Assertions.assertEquals(expected, LinearCombination.value(new double[] {a1, a2}, new double[] {b1, b2})); } } diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 0f93d3b..d58be94 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -68,6 +68,11 @@ Apache Commons Numbers 1.0-beta1 contains the following library modules: commons-numbers-quaternion (requires Java 8+) commons-numbers-rootfinder (requires Java 8+) "> + <action dev="aherbert" type="update" issue="NUMBERS-142" due-to="Alex Herbert"> + "LinearCombination": Update to use the dot2s algorithm. Avoids construction of an + intermediate array for array dot products. Update the hi-lo splitting algorithm + to use Dekker's split to ensure the product round-off is computed to exact precision. + </action> <action dev="aherbert" type="fix" issue="NUMBERS-150" due-to="Jin Xu"> "Fraction/BigFraction": Fixed pow(int) to handle Integer.MIN_VALUE and throw ArithmeticException for negative exponents to a fraction of zero.