This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push: new c2260f094 [LANG-1772] Restrict size of cache to prevent overflow errors (#1379) c2260f094 is described below commit c2260f094d78c236bb8a2e3e153d422c7d7ad6bf Author: James Winters <ja...@jghd.co.uk> AuthorDate: Sun May 25 07:53:37 2025 +1000 [LANG-1772] Restrict size of cache to prevent overflow errors (#1379) * LANG-1772 restrict size of cache to prevent overflow errors * LANG-1772 Adding a largeheap maven profile. Skipping the testHugeStrings method in all other profiles. * LANG-1772 Adding javadoc as per request. Also moving max cache size inside the CachedRandomBits constructor - also checking if the padding produces overflow. No longer using an arbitrary value but being more precise. * LANG-1772 Suggestions from PR implemented * LANG-1772 Using a long for the intermediate calculation and restricting the result to MAX_INT/5, also restricting max cache length to MAX_INT/3, there are now no opportunities for overflow. The test checks at the boundary condition * Close HTML tag * LANG-1772 Introduced many constants, and attempted to add comprehensive documentation around the nextBits method and the size allocation for the cache * Update src/main/java/org/apache/commons/lang3/RandomStringUtils.java You're right, should be outside the min Co-authored-by: Piotr P. Karwasz <pi...@github.copernik.eu> --------- Co-authored-by: James Winters <jwint...@atlassian.com> Co-authored-by: Gary Gregory <garydgreg...@users.noreply.github.com> Co-authored-by: Piotr P. Karwasz <pi...@github.copernik.eu> --- pom.xml | 14 ++++- .../org/apache/commons/lang3/CachedRandomBits.java | 60 +++++++++++++++++----- .../apache/commons/lang3/RandomStringUtils.java | 15 +++++- .../commons/lang3/RandomStringUtilsTest.java | 13 +++++ 4 files changed, 87 insertions(+), 15 deletions(-) diff --git a/pom.xml b/pom.xml index f7cae3ab1..5d320b70a 100644 --- a/pom.xml +++ b/pom.xml @@ -110,7 +110,10 @@ </distributionManagement> <properties> - <argLine>-Xmx512m</argLine> + <heapSize>-Xmx512m</heapSize> + <extraArgs/> + <systemProperties/> + <argLine>${heapSize} ${extraArgs} ${systemProperties}</argLine> <project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <!-- project.build.outputTimestamp is managed by Maven plugins, see https://maven.apache.org/guides/mini/guide-reproducible-builds.html --> @@ -460,7 +463,7 @@ <properties> <!-- LANG-1265: allow tests to access private fields/methods of java.base classes via reflection --> <!-- LANG-1667: allow tests to access private fields/methods of java.base/java.util such as ArrayList via reflection --> - <argLine>-Xmx512m --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED --add-opens java.base/java.time.chrono=ALL-UNNAMED</argLine> + <extraArgs>--add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED --add-opens java.base/java.time.chrono=ALL-UNNAMED</extraArgs> </properties> </profile> <profile> @@ -522,6 +525,13 @@ </plugins> </build> </profile> + <profile> + <id>largeheap</id> + <properties> + <heapSize>-Xmx1024m</heapSize> + <systemProperties>-Dtest.large.heap=true</systemProperties> + </properties> + </profile> </profiles> <developers> <developer> diff --git a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java index 92b561f38..5b90a0ecb 100644 --- a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java +++ b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java @@ -52,6 +52,20 @@ final class CachedRandomBits { */ private int bitIndex; + /** + * The maximum size of the cache. + * + * <p> + * This is to prevent the possibility of overflow in the {@code if (bitIndex >> 3 >= cache.length)} in the {@link #nextBits(int)} method. + * </p> + */ + private static final int MAX_CACHE_SIZE = Integer.MAX_VALUE >> 3; + /** Maximum number of bits that can be generated (size of an int) */ + private static final int MAX_BITS = 32; + /** Mask to extract the bit offset within a byte (0-7) */ + private static final int BIT_INDEX_MASK = 0x7; + /** Number of bits in a byte */ + private static final int BITS_PER_BYTE = 8; /** * Creates a new instance. * @@ -62,7 +76,7 @@ final class CachedRandomBits { if (cacheSize <= 0) { throw new IllegalArgumentException("cacheSize must be positive"); } - this.cache = new byte[cacheSize]; + this.cache = cacheSize <= MAX_CACHE_SIZE ? new byte[cacheSize] : new byte[MAX_CACHE_SIZE]; this.random = Objects.requireNonNull(random, "random"); this.random.nextBytes(this.cache); this.bitIndex = 0; @@ -71,28 +85,50 @@ final class CachedRandomBits { /** * Generates a random integer with the specified number of bits. * - * @param bits number of bits to generate, MUST be between 1 and 32 - * @return random integer with {@code bits} bits + * <p>This method efficiently generates random bits by using a byte cache and bit manipulation: + * <ul> + * <li>Uses a byte array cache to avoid frequent calls to the underlying random number generator</li> + * <li>Extracts bits from each byte using bit shifting and masking</li> + * <li>Handles partial bytes to avoid wasting random bits</li> + * <li>Accumulates bits until the requested number is reached</li> + * </ul> + * </p> + * + * @param bits number of bits to generate, MUST be between 1 and 32 (inclusive) + * @return random integer containing exactly the requested number of random bits + * @throws IllegalArgumentException if bits is not between 1 and 32 */ public int nextBits(final int bits) { - if (bits > 32 || bits <= 0) { - throw new IllegalArgumentException("number of bits must be between 1 and 32"); + if (bits > MAX_BITS || bits <= 0) { + throw new IllegalArgumentException("number of bits must be between 1 and " + MAX_BITS); } int result = 0; int generatedBits = 0; // number of generated bits up to now while (generatedBits < bits) { + // Check if we need to refill the cache + // Convert bitIndex to byte index by dividing by 8 (right shift by 3) if (bitIndex >> 3 >= cache.length) { - // we exhausted the number of bits in the cache - // this should only happen if the bitIndex is exactly matching the cache length - assert bitIndex == cache.length * 8; + // We exhausted the number of bits in the cache + // This should only happen if the bitIndex is exactly matching the cache length + assert bitIndex == cache.length * BITS_PER_BYTE; random.nextBytes(cache); bitIndex = 0; } - // generatedBitsInIteration is the number of bits that we will generate - // in this iteration of the while loop - final int generatedBitsInIteration = Math.min(8 - (bitIndex & 0x7), bits - generatedBits); + // Calculate how many bits we can extract from the current byte + // 1. Get current position within byte (0-7) using bitIndex & 0x7 + // 2. Calculate remaining bits in byte: 8 - (position within byte) + // 3. Take minimum of remaining bits in byte and bits still needed + final int generatedBitsInIteration = Math.min( + BITS_PER_BYTE - (bitIndex & BIT_INDEX_MASK), + bits - generatedBits); + // Shift existing result left to make room for new bits result = result << generatedBitsInIteration; - result |= cache[bitIndex >> 3] >> (bitIndex & 0x7) & (1 << generatedBitsInIteration) - 1; + // Extract and append new bits: + // 1. Get byte from cache (bitIndex >> 3 converts bit index to byte index) + // 2. Shift right by bit position within byte (bitIndex & 0x7) + // 3. Mask to keep only the bits we want ((1 << generatedBitsInIteration) - 1) + result |= cache[bitIndex >> 3] >> (bitIndex & BIT_INDEX_MASK) & ((1 << generatedBitsInIteration) - 1); + // Update counters generatedBits += generatedBitsInIteration; bitIndex += generatedBitsInIteration; } diff --git a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java index a75c7e85d..6f415f679 100644 --- a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java +++ b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java @@ -98,6 +98,10 @@ public class RandomStringUtils { private static final int ASCII_A = 'A'; private static final int ASCII_z = 'z'; + private static final int CACHE_PADDING_BITS = 3; + private static final int BITS_TO_BYTES_DIVISOR = 5; + private static final int BASE_CACHE_SIZE_PADDING = 10; + /** * Gets the singleton instance based on {@link ThreadLocalRandom#current()}; <b>which is not cryptographically * secure</b>; use {@link #secure()} to use an algorithms/providers specified in the @@ -329,7 +333,16 @@ public static String random(int count, int start, int end, final boolean letters // Ideally the cache size depends on multiple factor, including the cost of generating x bytes // of randomness as well as the probability of rejection. It is however not easy to know // those values programmatically for the general case. - final CachedRandomBits arb = new CachedRandomBits((count * gapBits + 3) / 5 + 10, random); + // Calculate cache size: + // 1. Multiply count by bits needed per character (gapBits) + // 2. Add padding bits (3) to handle partial bytes + // 3. Divide by 5 to convert to bytes (normally this would be by 8, dividing by 5 allows for about 60% extra space) + // 4. Add base padding (10) to handle small counts efficiently + // 5. Ensure we don't exceed Integer.MAX_VALUE / 5 + 10 to provide a good balance between overflow prevention and + // making the cache extremely large + final long desiredCacheSize = ((long) count * gapBits + CACHE_PADDING_BITS) / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING; + final int cacheSize = (int) Math.min(desiredCacheSize, Integer.MAX_VALUE / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING); + final CachedRandomBits arb = new CachedRandomBits(cacheSize, random); while (count-- != 0) { // Generate a random value between start (included) and end (excluded) diff --git a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java index 186358e09..0281ceece 100644 --- a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java @@ -29,8 +29,10 @@ import java.util.stream.Stream; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; /** * Tests {@link RandomStringUtils}. @@ -38,6 +40,9 @@ public class RandomStringUtilsTest extends AbstractLangTest { private static final int LOOP_COUNT = 1_000; + /** Maximum safe value for count to avoid overflow: (21x + 3) / 5 + 10 < 0x0FFF_FFFF */ + private static final int MAX_SAFE_COUNT = 63_913_201; + static Stream<RandomStringUtils> randomProvider() { return Stream.of(RandomStringUtils.secure(), RandomStringUtils.secureStrong(), RandomStringUtils.insecure()); @@ -802,4 +807,12 @@ public void testRandomWithChars(final RandomStringUtils rsu) { assertNotEquals(r1, r3); assertNotEquals(r2, r3); } + + @ParameterizedTest + @ValueSource(ints = {MAX_SAFE_COUNT, MAX_SAFE_COUNT + 1}) + @EnabledIfSystemProperty(named = "test.large.heap", matches = "true") + public void testHugeStrings(final int expectedLength) { + final String hugeString = RandomStringUtils.random(expectedLength); + assertEquals(expectedLength, hugeString.length(), "hugeString.length() == expectedLength"); + } }