This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c39cf40fd5c [Upsert] add xxhash for PrimaryKey compression (#17253)
c39cf40fd5c is described below
commit c39cf40fd5c41bf84b55efc26920b6d723d02f0c
Author: Qiaochu Liu <[email protected]>
AuthorDate: Sun Nov 23 15:42:38 2025 -0800
[Upsert] add xxhash for PrimaryKey compression (#17253)
* [Upsert] add xxhash for PrimaryKey compression
* add support for xxh_128
* fix lint
---
.../pinot/segment/local/utils/HashUtils.java | 30 ++++++++++++++++++++++
.../pinot/segment/local/utils/HashUtilsTest.java | 4 +++
.../pinot/spi/config/table/HashFunction.java | 2 +-
3 files changed, 35 insertions(+), 1 deletion(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
index 6e2bcf6e9ba..5070bbf9270 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
@@ -18,10 +18,13 @@
*/
package org.apache.pinot.segment.local.utils;
+import com.dynatrace.hash4j.hashing.HashValue128;
+import com.dynatrace.hash4j.hashing.Hasher128;
import com.google.common.hash.Hashing;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.UUID;
+import net.jpountz.xxhash.XXHashFactory;
import org.apache.pinot.spi.config.table.HashFunction;
import org.apache.pinot.spi.data.readers.PrimaryKey;
import org.apache.pinot.spi.utils.ByteArray;
@@ -63,6 +66,29 @@ public class HashUtils {
return result;
}
+ /** Compute 64-bit xxHash (XXH64) with seed=0, returned as big-endian 8-byte
array. */
+ public static byte[] hashXXHash(byte[] bytes) {
+ XXHashFactory xxhFactory = XXHashFactory.fastestInstance();
+ long hash64 = xxhFactory.hash64().hash(bytes, 0, bytes.length, 0L);
+ ByteBuffer buf = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN);
+ buf.putLong(hash64);
+ return buf.array();
+ }
+
+ /**
+ * Compute xxh128 using hash4j (XXH3-128). Returns a 16-byte array
(big-endian order for each 64-bit half).
+ */
+ public static byte[] hashXXH128(byte[] bytes) {
+ Hasher128 hasher = com.dynatrace.hash4j.hashing.Hashing.xxh3_128();
+ HashValue128 hashValue128 = hasher.hashBytesTo128Bits(bytes);
+
+ // Encode as big-endian 16 bytes
+ ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.BIG_ENDIAN);
+ buf.putLong(hashValue128.getMostSignificantBits());
+ buf.putLong(hashValue128.getLeastSignificantBits());
+ return buf.array();
+ }
+
public static Object hashPrimaryKey(PrimaryKey primaryKey, HashFunction
hashFunction) {
switch (hashFunction) {
case NONE:
@@ -73,6 +99,10 @@ public class HashUtils {
return new ByteArray(HashUtils.hashMurmur3(primaryKey.asBytes()));
case UUID:
return new ByteArray(HashUtils.hashUUID(primaryKey));
+ case XXHASH:
+ return new ByteArray(HashUtils.hashXXHash(primaryKey.asBytes()));
+ case XXH128:
+ return new ByteArray(HashUtils.hashXXH128(primaryKey.asBytes()));
default:
throw new IllegalArgumentException(String.format("Unrecognized hash
function %s", hashFunction));
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
index c0f67ec1703..9f4649b162a 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
@@ -33,6 +33,10 @@ public class HashUtilsTest {
"5eb63bbbe01eeed093cb22bb8f5acdc3");
assertEquals(BytesUtils.toHexString(HashUtils.hashMurmur3("hello
world".getBytes())),
"0e617feb46603f53b163eb607d4697ab");
+ assertEquals(BytesUtils.toHexString(HashUtils.hashXXHash("hello
world".getBytes())),
+ "45ab6734b21e6968");
+ assertEquals(BytesUtils.toHexString(HashUtils.hashXXH128("hello
world".getBytes())),
+ "df8d09e93f874900a99b8775cc15b6c7");
}
@Test
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
index effd15ff9b6..5185b92119d 100644
---
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
@@ -19,5 +19,5 @@
package org.apache.pinot.spi.config.table;
public enum HashFunction {
- NONE, MD5, MURMUR3, UUID
+ NONE, MD5, MURMUR3, UUID, XXHASH, XXH128
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]