This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-collections.git

commit bbee9fbd9b7d4a392ea2a43e8413065195d1823a
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Wed Mar 18 10:49:15 2020 +0000

    Update Hasher.Builder.
    
    Add default methods to add a CharSequenece.
    
    Make it clear each object added to the Builder should represent an
    entire item.
    
    Document that build() should reset the builder for future use.
---
 .../bloomfilter/hasher/DynamicHasher.java          |  34 +++----
 .../collections4/bloomfilter/hasher/Hasher.java    |  44 ++++++--
 .../bloomfilter/HasherBloomFilterTest.java         |   4 +-
 .../hasher/DynamicHasherBuilderTest.java           |  60 +++++++----
 .../bloomfilter/hasher/DynamicHasherTest.java      |   7 +-
 .../bloomfilter/hasher/HasherBuilderTest.java      | 111 +++++++++++++++++++++
 6 files changed, 207 insertions(+), 53 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
 
b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
index a4fcf6a..f7aa5b6 100644
--- 
a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
+++ 
b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasher.java
@@ -16,7 +16,7 @@
  */
 package org.apache.commons.collections4.bloomfilter.hasher;
 
-import java.nio.charset.StandardCharsets;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -35,7 +35,7 @@ public class DynamicHasher implements Hasher {
     public static class Builder implements Hasher.Builder {
 
         /**
-         * The list of byte[] that are to be hashed.
+         * The list of items (each as a byte[]) that are to be hashed.
          */
         private final List<byte[]> buffers;
 
@@ -54,35 +54,31 @@ public class DynamicHasher implements Hasher {
             this.buffers = new ArrayList<>();
         }
 
-        /**
-         * Builds the hasher.
-         *
-         * @return A DynamicHasher with the specified name, function and 
buffers.
-         */
         @Override
         public DynamicHasher build() throws IllegalArgumentException {
-            return new DynamicHasher(function, buffers);
+            // Assumes the hasher will create a copy of the buffers
+            final DynamicHasher hasher = new DynamicHasher(function, buffers);
+            // Reset for further use
+            buffers.clear();
+            return hasher;
         }
 
         @Override
-        public final Builder with(final byte property) {
-            return with(new byte[] {property});
+        public final DynamicHasher.Builder with(final byte[] property) {
+            buffers.add(property);
+            return this;
         }
 
         @Override
-        public final Builder with(final byte[] property) {
-            buffers.add(property);
+        public DynamicHasher.Builder with(CharSequence item, Charset charset) {
+            Hasher.Builder.super.with(item, charset);
             return this;
         }
 
-        /**
-         * {@inheritDoc}
-         *
-         * <p>The string is converted to a byte array using the UTF-8 
Character set.
-         */
         @Override
-        public final Builder with(final String property) {
-            return with(property.getBytes(StandardCharsets.UTF_8));
+        public DynamicHasher.Builder withUnencoded(CharSequence item) {
+            Hasher.Builder.super.withUnencoded(item);
+            return this;
         }
     }
 
diff --git 
a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java 
b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
index 5816712..8f5d5c2 100644
--- 
a/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
+++ 
b/src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Hasher.java
@@ -16,6 +16,7 @@
  */
 package org.apache.commons.collections4.bloomfilter.hasher;
 
+import java.nio.charset.Charset;
 import java.util.PrimitiveIterator;
 
 /**
@@ -46,39 +47,62 @@ public interface Hasher {
 
     /**
      * A builder to build a hasher.
+     *
+     * <p>A hasher represents one or more items of arbitrary byte size. The 
builder
+     * contains methods to collect byte representations of items. Each method 
to add
+     * to the builder will add an entire item to the final hasher created by 
the
+     * {@link #build()} method.
+     *
      * @since 4.5
      */
     interface Builder {
 
         /**
-         * Builds the hasher.
+         * Builds the hasher from all the items.
+         *
+         * <p>This method will clear the builder for future use.
+         *
          * @return the fully constructed hasher
          */
         Hasher build();
 
         /**
-         * Adds a byte to the hasher.
+         * Adds a byte array item to the hasher.
          *
-         * @param property the byte to add
+         * @param item the item to add
          * @return a reference to this object
          */
-        Builder with(byte property);
+        Builder with(byte[] item);
 
         /**
-         * Adds an array of bytes to the hasher.
+         * Adds a character sequence item to the hasher using the specified 
{@code charset}
+         * encoding.
          *
-         * @param property the array of bytes to add
+         * @param item the item to add
+         * @param charset the character set
          * @return a reference to this object
          */
-        Builder with(byte[] property);
+        default Builder with(CharSequence item, Charset charset) {
+            return with(item.toString().getBytes(charset));
+        }
 
         /**
-         * Adds a string to the hasher.
+         * Adds a character sequence item to the hasher. Each 16-bit character 
is
+         * converted to 2 bytes using little-endian order.
          *
-         * @param property the string to add
+         * @param item the item to add
          * @return a reference to this object
          */
-        Builder with(String property);
+        default Builder withUnencoded(CharSequence item) {
+            int length = item.length();
+            final byte[] bytes = new byte[length * 2];
+            for (int i = 0; i < length; i++) {
+                final char ch = item.charAt(i);
+                bytes[i * 2] = (byte) ch;
+                bytes[i * 2 + 1] = (byte) (ch >>> 8);
+            }
+            return with(bytes);
+        }
     }
 
     /**
diff --git 
a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
 
b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
index 0ed0cc1..e9b63ba 100644
--- 
a/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
+++ 
b/src/test/java/org/apache/commons/collections4/bloomfilter/HasherBloomFilterTest.java
@@ -25,7 +25,7 @@ import 
org.apache.commons.collections4.bloomfilter.hasher.Shape;
 import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
 import org.junit.Assert;
 import org.junit.Test;
-
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.PrimitiveIterator.OfInt;
 
@@ -40,7 +40,7 @@ public class HasherBloomFilterTest extends 
AbstractBloomFilterTest {
     @Test
     public void constructorTest_NonStatic() {
         final Shape shape = new Shape(new MD5Cyclic(), 3, 72, 17);
-        final DynamicHasher hasher = new DynamicHasher.Builder(new 
MD5Cyclic()).with("Hello").build();
+        final DynamicHasher hasher = new DynamicHasher.Builder(new 
MD5Cyclic()).with("Hello", StandardCharsets.UTF_8).build();
         final HasherBloomFilter filter = createFilter(hasher, shape);
         final long[] lb = filter.getBits();
         assertEquals(2, lb.length);
diff --git 
a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
index 0fcf49e..94e685c 100644
--- 
a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
+++ 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherBuilderTest.java
@@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.PrimitiveIterator.OfInt;
 
 import org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic;
@@ -32,16 +33,18 @@ import org.junit.Test;
 public class DynamicHasherBuilderTest {
 
     private DynamicHasher.Builder builder;
-    private final Shape shape = new Shape(new MD5Cyclic(), 1, 
Integer.MAX_VALUE, 1);
+    private HashFunction hf = new MD5Cyclic();
+    private final Shape shape = new Shape(hf, 1, 345, 1);
+    private String testString = HasherBuilderTest.getExtendedString();
 
     /**
-     * Tests that hashing a byte works as expected.
+     * Tests that hashing a byte array works as expected.
      */
     @Test
-    public void buildTest_byte() {
-        final DynamicHasher hasher = builder.with((byte) 0x1).build();
-
-        final int expected = 1483089307;
+    public void buildTest_byteArray() {
+        final byte[] bytes = testString.getBytes();
+        final DynamicHasher hasher = builder.with(bytes).build();
+        final int expected = (int) Math.floorMod(hf.apply(bytes, 0), 
shape.getNumberOfBits());
 
         final OfInt iter = hasher.iterator(shape);
 
@@ -51,29 +54,30 @@ public class DynamicHasherBuilderTest {
     }
 
     /**
-     * Tests that hashing a byte array works as expected.
+     * Tests that an empty hasher works as expected.
      */
     @Test
-    public void buildTest_byteArray() {
-        final DynamicHasher hasher = builder.with("Hello".getBytes()).build();
-        final int expected = 1519797563;
+    public void buildTest_Empty() {
+        final DynamicHasher hasher = builder.build();
 
         final OfInt iter = hasher.iterator(shape);
 
-        assertTrue(iter.hasNext());
-        assertEquals(expected, iter.nextInt());
         assertFalse(iter.hasNext());
     }
 
     /**
-     * Tests that an empty hasher works as expected.
+     * Tests that hashing a string works as expected.
      */
     @Test
-    public void buildTest_Empty() {
-        final DynamicHasher hasher = builder.build();
+    public void buildTest_String() {
+        final byte[] bytes = testString.getBytes(StandardCharsets.UTF_8);
+        final DynamicHasher hasher = builder.with(testString, 
StandardCharsets.UTF_8).build();
+        final int expected = (int) Math.floorMod(hf.apply(bytes, 0), 
shape.getNumberOfBits());
 
         final OfInt iter = hasher.iterator(shape);
 
+        assertTrue(iter.hasNext());
+        assertEquals(expected, iter.nextInt());
         assertFalse(iter.hasNext());
     }
 
@@ -81,9 +85,10 @@ public class DynamicHasherBuilderTest {
      * Tests that hashing a string works as expected.
      */
     @Test
-    public void buildTest_String() {
-        final DynamicHasher hasher = builder.with("Hello").build();
-        final int expected = 1519797563;
+    public void buildTest_UnencodedString() {
+        final byte[] bytes = testString.getBytes(StandardCharsets.UTF_16LE);
+        final DynamicHasher hasher = builder.withUnencoded(testString).build();
+        final int expected = (int) Math.floorMod(hf.apply(bytes, 0), 
shape.getNumberOfBits());
 
         final OfInt iter = hasher.iterator(shape);
 
@@ -93,10 +98,27 @@ public class DynamicHasherBuilderTest {
     }
 
     /**
+     * Tests that build resets the builder.
+     */
+    @Test
+    public void buildResetTest() {
+        builder.with(new byte[] {123});
+        final OfInt iter = builder.build().iterator(shape);
+
+        assertTrue(iter.hasNext());
+        iter.next();
+        assertFalse(iter.hasNext());
+
+        // Nothing added since last build so it should be an empty hasher
+        final OfInt iter2 = builder.build().iterator(shape);
+        assertFalse(iter2.hasNext());
+    }
+
+    /**
      * Sets up the builder for testing.
      */
     @Before
     public void setup() {
-        builder = new DynamicHasher.Builder(new MD5Cyclic());
+        builder = new DynamicHasher.Builder(hf);
     }
 }
diff --git 
a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
index e79f6b7..317bf95 100644
--- 
a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
+++ 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/DynamicHasherTest.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.nio.charset.StandardCharsets;
 import java.util.NoSuchElementException;
 import java.util.PrimitiveIterator.OfInt;
 
@@ -80,7 +81,7 @@ public class DynamicHasherTest {
 
         final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 
2, 49, 24, 15, 62};
 
-        final Hasher hasher = builder.with("Hello").build();
+        final Hasher hasher = builder.with("Hello", 
StandardCharsets.UTF_8).build();
 
         final OfInt iter = hasher.iterator(shape);
 
@@ -99,7 +100,7 @@ public class DynamicHasherTest {
         final int[] expected = {6, 69, 44, 19, 10, 57, 48, 23, 70, 61, 36, 11, 
2, 49, 24, 15, 62, 1, 63, 53, 43, 17, 7, 69,
             59, 49, 39, 13, 3, 65, 55, 45, 35, 25};
 
-        final Hasher hasher = builder.with("Hello").with("World").build();
+        final Hasher hasher = builder.with("Hello", 
StandardCharsets.UTF_8).with("World", StandardCharsets.UTF_8).build();
 
         final OfInt iter = hasher.iterator(shape);
 
@@ -122,7 +123,7 @@ public class DynamicHasherTest {
     @Test
     public void testGetBits_WrongShape() {
 
-        final Hasher hasher = builder.with("Hello").build();
+        final Hasher hasher = builder.with("Hello", 
StandardCharsets.UTF_8).build();
 
         try {
             hasher.iterator(new Shape(testFunction, 3, 72, 17));
diff --git 
a/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
new file mode 100644
index 0000000..767b54a
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/collections4/bloomfilter/hasher/HasherBuilderTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter.hasher;
+
+import org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder;
+import org.apache.commons.lang3.NotImplementedException;
+import org.junit.Assert;
+import org.junit.Test;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+
+/**
+ * Tests the
+ * {@link org.apache.commons.collections4.bloomfilter.hasher.Hasher.Builder 
Hasher.Builder}.
+ */
+public class HasherBuilderTest {
+
+    /**
+     * Simple class to collect byte[] items added to the builder.
+     */
+    private static class TestBuilder implements Hasher.Builder {
+        ArrayList<byte[]> items = new ArrayList<>();
+
+        @Override
+        public Hasher build() {
+            throw new NotImplementedException("Not required");
+        }
+
+        @Override
+        public Builder with(byte[] item) {
+            items.add(item);
+            return this;
+        }
+    }
+
+    /**
+     * Tests that adding CharSequence items works correctly.
+     */
+    @Test
+    public void withCharSequenceTest() {
+        final String ascii = "plain";
+        final String extended = getExtendedString();
+        for (final String s : new String[] {ascii, extended}) {
+            for (final Charset cs : new Charset[] {
+                StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, 
StandardCharsets.UTF_16
+            }) {
+                TestBuilder builder = new TestBuilder();
+                builder.with(s, cs);
+                Assert.assertArrayEquals(s.getBytes(cs), builder.items.get(0));
+            }
+        }
+    }
+
+    /**
+     * Tests that adding unencoded CharSequence items works correctly.
+     */
+    @Test
+    public void withUnecodedCharSequenceTest() {
+        final String ascii = "plain";
+        final String extended = getExtendedString();
+        for (final String s : new String[] {ascii, extended}) {
+            final TestBuilder builder = new TestBuilder();
+            builder.withUnencoded(s);
+            final byte[] encoded = builder.items.get(0);
+            final char[] original = s.toCharArray();
+            // Should be twice the length
+            Assert.assertEquals(original.length * 2, encoded.length);
+            // Should be little endian (lower bits first)
+            final CharBuffer buffer = ByteBuffer.wrap(encoded)
+                                                
.order(ByteOrder.LITTLE_ENDIAN).asCharBuffer();
+            for (int i = 0; i < original.length; i++) {
+                Assert.assertEquals(original[i], buffer.get(i));
+            }
+        }
+    }
+
+    /**
+     * Gets a string with non-standard characters.
+     *
+     * @return the extended string
+     */
+    static String getExtendedString() {
+        final char[] data = {'e', 'x', 't', 'e', 'n', 'd', 'e', 'd', ' ',
+            // Add some characters that are non standard
+            // non-ascii
+            0xCA98,
+            // UTF-16 surrogate pair
+            0xD803, 0xDE6D
+            // Add other cases here ...
+        };
+        return String.valueOf(data);
+    }
+}

Reply via email to