shubhamvishu commented on code in PR #12868: URL: https://github.com/apache/lucene/pull/12868#discussion_r1756832910
########## lucene/core/src/java/org/apache/lucene/util/StringHelper.java: ########## @@ -209,6 +209,156 @@ public static int murmurhash3_x86_32(BytesRef bytes, int seed) { return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed); } + /** + * Generates 128-bit hash from the byte array with the given offset, length and seed. + * + * <p>The code is adopted from Apache Commons (<a + * href="https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html">link</a>) + * + * @param data The input byte array + * @param offset The first element of array + * @param length The length of array + * @param seed The initial seed value + * @return The 128-bit hash (2 longs) + */ + public static long[] murmurhash3_x64_128( + final byte[] data, final int offset, final int length, final int seed) { + // Use an unsigned 32-bit integer as the seed + return murmurhash3_x64_128(data, offset, length, seed & 0xFFFFFFFFL); + } + + @SuppressWarnings("fallthrough") + private static long[] murmurhash3_x64_128( + final byte[] data, final int offset, final int length, final long seed) { + long h1 = seed; + long h2 = seed; + final int nblocks = length >> 4; + + // Constants for 128-bit variant + final long C1 = 0x87c37b91114253d5L; + final long C2 = 0x4cf5ad432745937fL; + final int R1 = 31; + final int R2 = 27; + final int R3 = 33; + final int M = 5; + final int N1 = 0x52dce729; + final int N2 = 0x38495ab5; + + // body + for (int i = 0; i < nblocks; i++) { + final int index = offset + (i << 4); + long k1 = (long) BitUtil.VH_LE_LONG.get(data, index); + long k2 = (long) BitUtil.VH_LE_LONG.get(data, index + 8); + + // mix functions for k1 + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + h1 ^= k1; + h1 = Long.rotateLeft(h1, R2); + h1 += h2; + h1 = h1 * M + N1; + + // mix functions for k2 + k2 *= C2; + k2 = Long.rotateLeft(k2, R3); + k2 *= C1; + h2 ^= k2; + h2 = Long.rotateLeft(h2, R1); + h2 += h1; + h2 = h2 * M + N2; + } + + // tail + long k1 = 0; + long k2 = 0; + final int index = offset + (nblocks << 4); + switch (offset + length - index) { Review Comment: Makes sense. I'll update it (Maybe we could do this in the [Apache commons impl](https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java) also) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org