This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-collections.git
commit 9de28a7b622972df1b32311ae605dc97761499f2 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Sun Mar 15 21:45:05 2020 +0000 Updated the BloomFilter javadoc. Remove trailing periods on parameters and arguments. Remove reference to LongBuffer. Clarify what the long[] represents in 'long[] getBits()'. Clarify cardinality using (number of enabled bits). Rearrange BloomFilter interface methods to functional order. The order is: - Query operations - Modification operations - Counting operations Improve javadoc for BloomFilter contains with additional information for what 'contains' means. Update exception message for contains/merge/add/subtract to be consistent. --- .../collections4/bloomfilter/BloomFilter.java | 130 +++++++++++++-------- .../bloomfilter/CountingBloomFilter.java | 112 ++++++++++-------- 2 files changed, 144 insertions(+), 98 deletions(-) diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java index 63adc2b..af43ddd 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java @@ -26,100 +26,128 @@ import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher; */ public interface BloomFilter { + // Query Operations + /** - * Performs a logical "AND" with the other Bloom filter and returns the cardinality of - * the result. + * Gets the shape of this filter. * - * @param other the other Bloom filter. - * @return the cardinality of the result of {@code ( this AND other )}. + * @return the shape of this filter */ - int andCardinality(BloomFilter other); + Shape getShape(); /** - * Gets the cardinality of this Bloom filter. - * <p>This is also known as the Hamming value.</p> + * Gets an array of little-endian long values representing the bits of this filter. * - * @return the cardinality (number of enabled bits) in this filter. + * <p>The returned array will have length {@code ceil(m / 64)} where {@code m} is the + * number of bits in the filter and {@code ceil} is the ceiling function. + * Bits 0-63 are in the first long. A value of 1 at a bit position indicates the bit + * index is enabled. + * + * @return the {@code long[]} representation of this filter */ - int cardinality(); + long[] getBits(); /** - * Performs a contains check. Effectively this AND other == other. + * Creates a StaticHasher that contains the indexes of the bits that are on in this + * filter. * - * @param other the Other Bloom filter. - * @return true if this filter matches the other. + * @return a StaticHasher for that produces this Bloom filter */ - boolean contains(BloomFilter other); + StaticHasher getHasher(); /** - * Performs a contains check against a decomposed Bloom filter. The shape must match - * the shape of this filter. The hasher provides bit indexes to check for. Effectively - * decomposed AND this == decomposed. + * Returns {@code true} if this filter contains the specified filter. Specifically this + * returns {@code true} if this filter is enabled for all bits that are enabled in the + * {@code other} filter. Using the bit representations this is + * effectively {@code (this AND other) == other}. * - * @param hasher The hasher containing the bits to check. - * @return true if this filter contains the other. - * @throws IllegalArgumentException if the shape argument does not match the shape of - * this filter, or if the hasher is not the specified one + * @param other the other Bloom filter + * @return true if this filter is enabled for all enabled bits in the other filter + * @throws IllegalArgumentException if the shape of the other filter does not match + * the shape of this filter */ - boolean contains(Hasher hasher); + boolean contains(BloomFilter other); /** - * Gets an array of little-endian long values representing the on bits of this filter. - * bits 0-63 are in the first long. + * Returns {@code true} if this filter contains the specified decomposed Bloom filter. + * Specifically this returns {@code true} if this filter is enabled for all bit indexes + * identified by the {@code hasher}. Using the bit representations this is + * effectively {@code (this AND hasher) == hasher}. * - * @return the LongBuffer representation of this filter. + * @param hasher the hasher to provide the indexes + * @return true if this filter is enabled for all bits specified by the hasher + * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of + * this filter */ - long[] getBits(); + boolean contains(Hasher hasher); + + // Modification Operations /** - * Creates a StaticHasher that contains the indexes of the bits that are on in this - * filter. + * Merges the specified Bloom filter into this Bloom filter. Specifically all bit indexes + * that are enabled in the {@code other} filter will be enabled in this filter. * - * @return a StaticHasher for that produces this Bloom filter. + * <p>Note: This method should return {@code true} even if no additional bit indexes were + * enabled. A {@code false} result indicates that this filter is not ensured to contain + * the {@code other} Bloom filter. + * + * @param other the other Bloom filter + * @return true if the merge was successful + * @throws IllegalArgumentException if the shape of the other filter does not match + * the shape of this filter */ - StaticHasher getHasher(); + boolean merge(BloomFilter other); /** - * Gets the shape of this filter. + * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all + * bit indexes that are identified by the {@code hasher} will be enabled in this filter. * - * @return The shape of this filter. + * <p>Note: This method should return {@code true} even if no additional bit indexes were + * enabled. A {@code false} result indicates that this filter is not ensured to contain + * the specified decomposed Bloom filter. + * + * @param hasher the hasher to provide the indexes + * @return true if the merge was successful + * @throws IllegalArgumentException if the hasher cannot generate indices for the shape of + * this filter */ - Shape getShape(); + boolean merge(Hasher hasher); + + // Counting Operations /** - * Merges the other Bloom filter into this one. + * Gets the cardinality (number of enabled bits) of this Bloom filter. * - * @param other the other Bloom filter. - * @return true if the merge was successful + * <p>This is also known as the Hamming value.</p> + * + * @return the cardinality of this filter */ - boolean merge(BloomFilter other); + int cardinality(); /** - * Merges the decomposed Bloom filter defined by the hasher into this Bloom - * filter. The hasher provides an iterator of bit indexes to enable. + * Performs a logical "AND" with the other Bloom filter and returns the cardinality + * (number of enabled bits) of the result. * - * @param hasher the hasher to provide the indexes. - * @return true if the merge was successful - * @throws IllegalArgumentException if the shape argument does not match the shape of - * this filter, or if the hasher is not the specified one + * @param other the other Bloom filter + * @return the cardinality of the result of {@code (this AND other)} */ - boolean merge(Hasher hasher); + int andCardinality(BloomFilter other); /** - * Performs a logical "OR" with the other Bloom filter and returns the cardinality of - * the result. + * Performs a logical "OR" with the other Bloom filter and returns the cardinality + * (number of enabled bits) of the result. * - * @param other the other Bloom filter. - * @return the cardinality of the result of {@code ( this OR other )}. + * @param other the other Bloom filter + * @return the cardinality of the result of {@code (this OR other)} */ int orCardinality(BloomFilter other); /** - * Performs a logical "XOR" with the other Bloom filter and returns the cardinality of - * the result. + * Performs a logical "XOR" with the other Bloom filter and returns the cardinality + * (number of enabled bits) of the result. * - * @param other the other Bloom filter. - * @return the cardinality of the result of {@code ( this XOR other )} + * @param other the other Bloom filter + * @return the cardinality of the result of {@code (this XOR other)} */ int xorCardinality(BloomFilter other); } diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java index adce2a5..0c414eb 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java @@ -72,31 +72,76 @@ public interface CountingBloomFilter extends BloomFilter { void accept(int index, int count); } + // Query Operations + + /** + * Returns true if the internal state is valid. This flag is a warning that an addition or + * subtraction of counts from this filter resulted in an invalid count for one or more + * indexes. For example this may occur if a count for an index was + * set to negative following a subtraction operation, or overflows an {@code int} following an + * addition operation. + * + * <p>A counting Bloom filter that has an invalid state is no longer ensured to function + * identically to a standard Bloom filter instance that is the merge of all the Bloom filters + * that have been added to and not later subtracted from this counting Bloom filter. + * + * <p>Note: The change to an invalid state may or may not be reversible. Implementations + * are expected to document their policy on recovery from an addition or removal operation + * that generated an invalid state. + * + * @return true if the state is valid + */ + boolean isValid(); + + /** + * Performs the given action for each {@code <index, count>} pair where the count is non-zero. + * Any exceptions thrown by the action are relayed to the caller. + * + * @param action the action to be performed for each non-zero bit count + * @throws NullPointerException if the specified action is null + */ + void forEachCount(BitCountConsumer action); + + // Modification Operations + /** - * {@inheritDoc} + * Merges the specified Bloom filter into this Bloom filter. Specifically all counts for + * indexes that are enabled in the {@code other} filter will be incremented by 1. * - * <p>Note: If the other filter is a counting Bloom filter the index counts are ignored. - * All counts for the indexes identified by the other filter will be incremented by 1. + * <p>Note: If the other filter is a counting Bloom filter the index counts are ignored; only + * the enabled indexes are used. * * <p>This method will return true if the filter is valid after the operation. + * + * @param other {@inheritDoc} + * @return true if the merge was successful and the state is valid + * @throws IllegalArgumentException {@inheritDoc} + * @see #isValid() */ @Override boolean merge(BloomFilter other); /** - * {@inheritDoc} - * - * <p>Note: If the hasher contains duplicate bit indexes these are ignored. - * All counts for the indexes identified by the other filter will be incremented by 1. + * Merges the specified decomposed Bloom filter into this Bloom filter. Specifically all + * counts for the <em>distinct</em> indexes that are identified by the {@code hasher} will + * be incremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored. * * <p>This method will return true if the filter is valid after the operation. + * + * @param hasher {@inheritDoc} + * @return true if the merge was successful and the state is valid + * @throws IllegalArgumentException {@inheritDoc} + * @see #isValid() */ @Override - boolean merge(Hasher other); + boolean merge(Hasher hasher); /** - * Removes the other Bloom filter from this one. - * All counts for the indexes identified by the other filter will be decremented by 1. + * Removes the specified Bloom filter from this Bloom filter. Specifically + * all counts for the indexes identified by the {@code other} filter will be decremented by 1. + * + * <p>Note: If the other filter is a counting Bloom filter the index counts are ignored; only + * the enabled indexes are used. * * <p>This method will return true if the filter is valid after the operation. * @@ -105,13 +150,14 @@ public interface CountingBloomFilter extends BloomFilter { * @throws IllegalArgumentException if the shape of the other filter does not match * the shape of this filter * @see #isValid() + * @see #subtract(CountingBloomFilter) */ boolean remove(BloomFilter other); /** - * Removes the decomposed Bloom filter defined by the hasher from this Bloom filter. - * All counts for the indexes identified by the hasher will be decremented by 1. - * Duplicate indexes should be ignored. + * Removes the specified decomposed Bloom filter from this Bloom filter. Specifically + * all counts for the <em>distinct</em> indexes identified by the {@code hasher} will be + * decremented by 1. If the {@code hasher} contains duplicate bit indexes these are ignored. * * <p>This method will return true if the filter is valid after the operation. * @@ -124,9 +170,9 @@ public interface CountingBloomFilter extends BloomFilter { boolean remove(Hasher hasher); /** - * Adds the other counting Bloom filter to this one. - * All counts for the indexes identified by the other filter will be incremented by their - * corresponding counts in the other filter. + * Adds the specified counting Bloom filter to this Bloom filter. Specifically + * all counts for the indexes identified by the {@code other} filter will be incremented + * by their corresponding counts in the {@code other} filter. * * <p>This method will return true if the filter is valid after the operation. * @@ -139,9 +185,9 @@ public interface CountingBloomFilter extends BloomFilter { boolean add(CountingBloomFilter other); /** - * Subtracts the other counting Bloom filter from this one. - * All counts for the indexes identified by the other filter will be decremented by their - * corresponding counts in the other filter. + * Adds the specified counting Bloom filter to this Bloom filter. Specifically + * all counts for the indexes identified by the {@code other} filter will be decremented + * by their corresponding counts in the {@code other} filter. * * <p>This method will return true if the filter is valid after the operation. * @@ -152,32 +198,4 @@ public interface CountingBloomFilter extends BloomFilter { * @see #isValid() */ boolean subtract(CountingBloomFilter other); - - /** - * Returns true if the internal state is valid. This flag is a warning that an addition or - * subtraction of counts from this filter resulted in an invalid count for one or more - * indexes. For example this may occur if a count for an index was - * set to negative following a subtraction operation, or overflows an {@code int} following an - * addition operation. - * - * <p>A counting Bloom filter that has an invalid state is no longer ensured to function - * identically to a standard Bloom filter instance that is the merge of all the Bloom filters - * that have been added to and not later subtracted from this counting Bloom filter. - * - * <p>Note: The change to an invalid state may or may not be reversible. Implementations - * are expected to document their policy on recovery from an addition or removal operation - * that generated an invalid state. - * - * @return true if the state is valid - */ - boolean isValid(); - - /** - * Performs the given action for each {@code <index, count>} pair where the count is non-zero. - * Any exceptions thrown by the action are relayed to the caller. - * - * @param action the action to be performed for each non-zero bit count - * @throws NullPointerException if the specified action is null - */ - void forEachCount(BitCountConsumer action); }