Author: bodewig Date: Sat Sep 30 16:38:05 2017 New Revision: 1810226 URL: http://svn.apache.org/viewvc?rev=1810226&view=rev Log: CODEC-241 add support for XXHash32
Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java (with props) commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java (with props) commons/proper/codec/trunk/src/test/resources/ commons/proper/codec/trunk/src/test/resources/bla.tar (with props) commons/proper/codec/trunk/src/test/resources/bla.tar.xz (with props) Modified: commons/proper/codec/trunk/src/changes/changes.xml Modified: commons/proper/codec/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1810226&r1=1810225&r2=1810226&view=diff ============================================================================== --- commons/proper/codec/trunk/src/changes/changes.xml (original) +++ commons/proper/codec/trunk/src/changes/changes.xml Sat Sep 30 16:38:05 2017 @@ -45,6 +45,7 @@ The <action> type attribute can be add,u <release version="1.11" date="2017-MM-DD" description="Feature and fix release."> <!-- The first attribute below should be the issue id; makes it easier to navigate in the IDE outline --> + <action issue="CODEC-241" type="add">Add support for XXHash32</action> <action issue="CODEC-234" dev="ggregory" type="update" due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase letters</action> <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi Tamari">Soundex should support more algorithm variants</action> <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto encodeBase64URLSafeString)</action> Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java?rev=1810226&view=auto ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java (added) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java Sat Sep 30 16:38:05 2017 @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.codec.digest; + +import static java.lang.Integer.rotateLeft; + +import java.util.zip.Checksum; + +/** + * Implementation of the xxhash32 hash algorithm. + * + * <p>Copied from Commons Compress 1.14 + * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p> + * + * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a> + * @NotThreadSafe + * @since 1.11 + */ +public class XXHash32 implements Checksum { + + private static final int BUF_SIZE = 16; + private static final int ROTATE_BITS = 13; + + private static final int PRIME1 = (int) 2654435761l; + private static final int PRIME2 = (int) 2246822519l; + private static final int PRIME3 = (int) 3266489917l; + private static final int PRIME4 = 668265263; + private static final int PRIME5 = 374761393; + + private final byte[] oneByte = new byte[1]; + private final int[] state = new int[4]; + // Note: the code used to use ByteBuffer but the manual method is 50% faster + // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c + private final byte[] buffer = new byte[BUF_SIZE]; + private final int seed; + + private int totalLen; + private int pos; + + /** + * Creates an XXHash32 instance with a seed of 0. + */ + public XXHash32() { + this(0); + } + + /** + * Creates an XXHash32 instance. + * @param seed the seed to use + */ + public XXHash32(int seed) { + this.seed = seed; + initializeState(); + } + + @Override + public void reset() { + initializeState(); + totalLen = 0; + pos = 0; + } + + @Override + public void update(int b) { + oneByte[0] = (byte) (b & 0xff); + update(oneByte, 0, 1); + } + + @Override + public void update(byte[] b, int off, final int len) { + if (len <= 0) { + return; + } + totalLen += len; + + final int end = off + len; + + if (pos + len < BUF_SIZE) { + System.arraycopy(b, off, buffer, pos, len); + pos += len; + return; + } + + if (pos > 0) { + final int size = BUF_SIZE - pos; + System.arraycopy(b, off, buffer, pos, size); + process(buffer, 0); + off += size; + } + + final int limit = end - BUF_SIZE; + while (off <= limit) { + process(b, off); + off += BUF_SIZE; + } + + if (off < end) { + pos = end - off; + System.arraycopy(b, off, buffer, 0, pos); + } + } + + @Override + public long getValue() { + int hash; + if (totalLen > BUF_SIZE) { + hash = + rotateLeft(state[0], 1) + + rotateLeft(state[1], 7) + + rotateLeft(state[2], 12) + + rotateLeft(state[3], 18); + } else { + hash = state[2] + PRIME5; + } + hash += totalLen; + + int idx = 0; + final int limit = pos - 4; + for (; idx <= limit; idx += 4) { + hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4; + } + while (idx < pos) { + hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1; + } + + hash ^= hash >>> 15; + hash *= PRIME2; + hash ^= hash >>> 13; + hash *= PRIME3; + hash ^= hash >>> 16; + return hash & 0xffffffffl; + } + + private static int getInt(byte[] buffer, int idx) { + return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl); + } + + private void initializeState() { + state[0] = seed + PRIME1 + PRIME2; + state[1] = seed + PRIME2; + state[2] = seed; + state[3] = seed - PRIME1; + } + + private void process(byte[] b, int offset) { + // local shadows for performance + int s0 = state[0]; + int s1 = state[1]; + int s2 = state[2]; + int s3 = state[3]; + + s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1; + s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1; + s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1; + s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1; + + state[0] = s0; + state[1] = s1; + state[2] = s2; + state[3] = s3; + + pos = 0; + } + + /** + * Reads the given byte array as a little endian long. + * @param bytes the byte array to convert + * @param off the offset into the array that starts the value + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + */ + private static long fromLittleEndian(byte[] bytes, final int off, final int length) { + if (length > 8) { + throw new IllegalArgumentException("can't read more than eight bytes into a long value"); + } + long l = 0; + for (int i = 0; i < length; i++) { + l |= (bytes[off + i] & 0xffl) << (8 * i); + } + return l; + } +} Propchange: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/digest/XXHash32.java ------------------------------------------------------------------------------ svn:eol-style = native Added: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java?rev=1810226&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java (added) +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java Sat Sep 30 16:38:05 2017 @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.codec.digest; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URL; +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import org.junit.runner.RunWith; + +@RunWith(Parameterized.class) +public class XXHash32Test { + + private final File file; + private final String expectedChecksum; + + public XXHash32Test(String path, String c) throws IOException { + final URL url = XXHash32Test.class.getClassLoader().getResource(path); + if (url == null) { + throw new FileNotFoundException("couldn't find " + path); + } + URI uri = null; + try { + uri = url.toURI(); + } catch (final java.net.URISyntaxException ex) { + throw new IOException(ex); + } + file = new File(uri); + expectedChecksum = c; + } + + @Parameters + public static Collection<Object[]> factory() { + return Arrays.asList(new Object[][] { + // reference checksums created with xxh32sum + { "bla.tar", "fbb5c8d1" }, + { "bla.tar.xz", "4106a208" }, + }); + } + + @Test + public void verifyChecksum() throws IOException { + XXHash32 h = new XXHash32(); + FileInputStream s = new FileInputStream(file); + try { + byte[] b = toByteArray(s); + h.update(b, 0, b.length); + } finally { + s.close(); + } + Assert.assertEquals("checksum for " + file.getName(), expectedChecksum, Long.toHexString(h.getValue())); + } + + private static byte[] toByteArray(final InputStream input) throws IOException { + final ByteArrayOutputStream output = new ByteArrayOutputStream(); + copy(input, output, 10240); + return output.toByteArray(); + } + + private static long copy(final InputStream input, final OutputStream output, final int buffersize) throws IOException { + final byte[] buffer = new byte[buffersize]; + int n = 0; + long count=0; + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + return count; + } +} Propchange: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/digest/XXHash32Test.java ------------------------------------------------------------------------------ svn:eol-style = native Added: commons/proper/codec/trunk/src/test/resources/bla.tar URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/resources/bla.tar?rev=1810226&view=auto ============================================================================== Binary file - no diff available. Propchange: commons/proper/codec/trunk/src/test/resources/bla.tar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: commons/proper/codec/trunk/src/test/resources/bla.tar.xz URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/resources/bla.tar.xz?rev=1810226&view=auto ============================================================================== Binary file - no diff available. Propchange: commons/proper/codec/trunk/src/test/resources/bla.tar.xz ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream