This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9baf132185c Join improvements (#16830)
9baf132185c is described below
commit 9baf132185cdccea83acb6fef453530bd4edb4ef
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Mon Sep 29 09:00:34 2025 +0200
Join improvements (#16830)
---
.../org/apache/pinot/perf/BenchmarkEquiJoin.java | 1 -
.../planner/partitioning/EmptyKeySelector.java | 4 +-
.../planner/partitioning/HashFunctionSelector.java | 89 +++++++++++++++-------
.../partitioning/MultiColumnKeySelector.java | 6 +-
.../partitioning/SingleColumnKeySelector.java | 6 +-
.../partitioning/HashFunctionSelectorTest.java | 54 +++++++------
.../partitioning/KeySelectorHashFunctionTest.java | 7 +-
7 files changed, 107 insertions(+), 60 deletions(-)
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
index 507e26aea41..89a79056273 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
@@ -124,7 +124,6 @@ public class BenchmarkEquiJoin extends
BaseClusterIntegrationTest {
+ "JOIN MyTable t2 "
+ "ON t1.intCol = t2.intCol "
+ "WHERE t2.intCol % 10 = 0";
- ;
return query(query);
}
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
index 087d61ff11e..9a9f5a7c133 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
@@ -29,13 +29,13 @@ public class EmptyKeySelector implements
KeySelector<Integer> {
}
private EmptyKeySelector(String hashFunction) {
- _hashFunction = hashFunction;
+ _hashFunction = hashFunction.toLowerCase();
}
public static final EmptyKeySelector INSTANCE = new EmptyKeySelector();
public static EmptyKeySelector getInstance(String hashFunction) {
- if (KeySelector.DEFAULT_HASH_ALGORITHM.equals(hashFunction)) {
+ if (KeySelector.DEFAULT_HASH_ALGORITHM.equalsIgnoreCase(hashFunction)) {
return INSTANCE;
}
return new EmptyKeySelector(hashFunction);
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
index 6ab6ee7ccde..49fce607c44 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
@@ -34,47 +34,80 @@ public class HashFunctionSelector {
private HashFunctionSelector() {
}
+ public interface SvHasher {
+ int hash(Object value);
+ }
+
+ public interface MvHasher {
+
+ /**
+ * Computes a hash code for multiple values based on specified key IDs
using the specified hash function.
+ * This is useful for partitioning where only certain keys are relevant.
+ * @param values The array of values to hash.
+ * @param keyIds The array of key IDs indicating which values to include
in the hash computation.
+ * @return The computed hash code.
+ */
+ int hash(Object[] values, int[] keyIds);
+ }
+
/**
- * Computes a hash code for a single value using the specified hash function.
- * @param value The value to hash.
+ * Returns a hasher function based on the specified hash function.
* @param hashFunction The hash function to use (e.g., "murmur", "murmur3",
"cityhash", "absHashCode").
* @return The computed hash code.
*/
- public static int computeHash(Object value, String hashFunction) {
- if (value == null) {
- return 0;
- }
-
+ public static SvHasher getSvHasher(String hashFunction) {
switch (hashFunction.toLowerCase()) {
- case MURMUR2: return murmur2(value);
- case MURMUR3: return murmur3(value);
- // hashCode and absHashCode are treated the same for single hash.
+ case MURMUR2:
+ return value -> {
+ if (value == null) {
+ return 0;
+ }
+ return HashFunctionSelector.murmur2(value);
+ };
+ case MURMUR3:
+ return value -> {
+ if (value == null) {
+ return 0;
+ }
+ return HashFunctionSelector.murmur3(value);
+ };
case HASH_CODE:
// Default hash is absHashCode.
- default: return absHashCode(value);
+ default:
+ return value -> {
+ if (value == null) {
+ return 0;
+ }
+ return HashFunctionSelector.absHashCode(value);
+ };
}
}
- /**
- * Computes a hash code for multiple values based on specified key IDs using
the specified hash function.
- * This is useful for partitioning where only certain keys are relevant.
- * @param values The array of values to hash.
- * @param keyIds The array of key IDs indicating which values to include in
the hash computation.
- * @param hashFunction The hash function to use (e.g., "murmur2", "murmur3",
"cityhash", "absHashCode").
- * @return The computed hash code.
- */
- public static int computeMultiHash(Object[] values, int[] keyIds, String
hashFunction) {
- if (values == null || values.length == 0) {
- return 0;
- }
-
+ public static MvHasher getMvHasher(String hashFunction) {
switch (hashFunction.toLowerCase()) {
- case MURMUR2: return murmur2(values, keyIds);
- case MURMUR3: return murmur3(values, keyIds);
- // hashCode and absHashCode are treated the same for multi hash.
+ case MURMUR2:
+ return (values, keyIds) -> {
+ if (values == null || values.length == 0) {
+ return 0;
+ }
+ return murmur2(values, keyIds);
+ };
+ case MURMUR3:
+ return (values, keyIds) -> {
+ if (values == null || values.length == 0) {
+ return 0;
+ }
+ return murmur3(values, keyIds);
+ };
case HASH_CODE:
// We should hashCode instead of absHashCode for multi hash to
maintain consistency with legacy behavior.
- default: return hashCode(values, keyIds);
+ default:
+ return (values, keyIds) -> {
+ if (values == null || values.length == 0) {
+ return 0;
+ }
+ return hashCode(values, keyIds);
+ };
}
}
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
index 51fe01668f3..2cf175bb966 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
@@ -24,6 +24,7 @@ import org.apache.pinot.core.data.table.Key;
public class MultiColumnKeySelector implements KeySelector<Key> {
private final int[] _keyIds;
private final String _hashFunction;
+ private final HashFunctionSelector.MvHasher _hasher;
public MultiColumnKeySelector(int[] keyIds) {
this(keyIds, KeySelector.DEFAULT_HASH_ALGORITHM);
@@ -31,7 +32,8 @@ public class MultiColumnKeySelector implements
KeySelector<Key> {
public MultiColumnKeySelector(int[] keyIds, String hashFunction) {
_keyIds = keyIds;
- _hashFunction = hashFunction;
+ _hashFunction = hashFunction.toLowerCase();
+ _hasher = HashFunctionSelector.getMvHasher(_hashFunction);
}
@Override
@@ -63,7 +65,7 @@ public class MultiColumnKeySelector implements
KeySelector<Key> {
//
// TODO: consider better hashing algorithms than hashCode sum, such as
XOR'ing
// return a positive number because this is used directly to modulo-index
- return HashFunctionSelector.computeMultiHash(input, _keyIds,
_hashFunction);
+ return _hasher.hash(input, _keyIds);
}
@Override
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
index 3d19523941c..c3cdc892baf 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
@@ -24,6 +24,7 @@ import javax.annotation.Nullable;
public class SingleColumnKeySelector implements KeySelector<Object> {
private final int _keyId;
private final String _hashFunction;
+ private final HashFunctionSelector.SvHasher _hasher;
public SingleColumnKeySelector(int keyId) {
this(keyId, KeySelector.DEFAULT_HASH_ALGORITHM);
@@ -31,7 +32,8 @@ public class SingleColumnKeySelector implements
KeySelector<Object> {
public SingleColumnKeySelector(int keyId, String hashFunction) {
_keyId = keyId;
- _hashFunction = hashFunction;
+ _hashFunction = hashFunction.toLowerCase();
+ _hasher = HashFunctionSelector.getSvHasher(_hashFunction);
}
@Nullable
@@ -43,7 +45,7 @@ public class SingleColumnKeySelector implements
KeySelector<Object> {
@Override
public int computeHash(Object[] input) {
Object key = input[_keyId];
- return HashFunctionSelector.computeHash(key, _hashFunction);
+ return _hasher.hash(key);
}
@Override
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
index d6d6a2209cf..c67c337b11a 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
@@ -27,11 +27,16 @@ import org.testng.annotations.Test;
*/
public class HashFunctionSelectorTest {
+ private final HashFunctionSelector.SvHasher _abshashcode =
HashFunctionSelector.getSvHasher("abshashcode");
+ private final HashFunctionSelector.SvHasher _murmur =
HashFunctionSelector.getSvHasher("murmur");
+ private final HashFunctionSelector.SvHasher _murmur2 =
HashFunctionSelector.getSvHasher("murmur2");
+ private final HashFunctionSelector.SvHasher _murmur3 =
HashFunctionSelector.getSvHasher("murmur3");
+
@Test
public void testAbsHashCode() {
String value = "test";
- int hash1 = HashFunctionSelector.computeHash(value, "abshashcode");
- int hash2 = HashFunctionSelector.computeHash(value, "abshashcode");
+ int hash1 = _abshashcode.hash(value);
+ int hash2 = _abshashcode.hash(value);
// Same input should produce same hash
Assert.assertEquals(hash1, hash2);
@@ -43,8 +48,8 @@ public class HashFunctionSelectorTest {
@Test
public void testMurmur2() {
String value = "test";
- int hash1 = HashFunctionSelector.computeHash(value, "murmur");
- int hash2 = HashFunctionSelector.computeHash(value, "murmur");
+ int hash1 = _murmur.hash(value);
+ int hash2 = _murmur.hash(value);
// Same input should produce same hash
Assert.assertEquals(hash1, hash2);
@@ -53,15 +58,16 @@ public class HashFunctionSelectorTest {
Assert.assertTrue(hash1 >= 0);
// Should be different from absHashCode
- int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
+ int absHash = _abshashcode.hash(value);
Assert.assertNotEquals(hash1, absHash);
}
@Test
public void testMurmur3() {
String value = "test";
- int hash1 = HashFunctionSelector.computeHash(value, "murmur3");
- int hash2 = HashFunctionSelector.computeHash(value, "murmur3");
+
+ int hash1 = _murmur3.hash(value);
+ int hash2 = _murmur3.hash(value);
// Same input should produce same hash
Assert.assertEquals(hash1, hash2);
@@ -70,8 +76,8 @@ public class HashFunctionSelectorTest {
Assert.assertTrue(hash1 >= 0);
// Should be different from other hash functions
- int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
- int murmur2Hash = HashFunctionSelector.computeHash(value, "murmur2");
+ int absHash = _abshashcode.hash(value);
+ int murmur2Hash = _murmur2.hash(value);
Assert.assertNotEquals(hash1, absHash);
Assert.assertNotEquals(hash1, murmur2Hash);
}
@@ -79,8 +85,9 @@ public class HashFunctionSelectorTest {
@Test
public void testHashCode() {
String value = "test";
- int hash1 = HashFunctionSelector.computeHash(value, "hashcode");
- int hash2 = HashFunctionSelector.computeHash(value, "hashcode");
+ HashFunctionSelector.SvHasher svHasher =
HashFunctionSelector.getSvHasher("hashcode");
+ int hash1 = svHasher.hash(value);
+ int hash2 = svHasher.hash(value);
// Same input should produce same hash
Assert.assertEquals(hash1, hash2);
@@ -89,9 +96,9 @@ public class HashFunctionSelectorTest {
Assert.assertTrue(hash1 >= 0);
// Should be different from murmur and murmur3 but same as absHashCode
- int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
- int murmur2Hash = HashFunctionSelector.computeHash(value, "murmur");
- int murmur3Hash = HashFunctionSelector.computeHash(value, "murmur3");
+ int absHash = _abshashcode.hash(value);
+ int murmur2Hash = _murmur.hash(value);
+ int murmur3Hash = _murmur3.hash(value);
Assert.assertEquals(hash1, absHash);
Assert.assertNotEquals(hash1, murmur2Hash);
Assert.assertNotEquals(hash1, murmur3Hash);
@@ -100,26 +107,29 @@ public class HashFunctionSelectorTest {
@Test
public void testNullValue() {
// Null values should return 0 for all hash functions
- Assert.assertEquals(HashFunctionSelector.computeHash(null, "abshashcode"),
0);
- Assert.assertEquals(HashFunctionSelector.computeHash(null, "murmur"), 0);
- Assert.assertEquals(HashFunctionSelector.computeHash(null, "murmur3"), 0);
- Assert.assertEquals(HashFunctionSelector.computeHash(null, "cityhash"), 0);
+ Assert.assertEquals(_abshashcode.hash(null), 0);
+ Assert.assertEquals(_murmur.hash(null), 0);
+ Assert.assertEquals(_murmur3.hash(null), 0);
+ HashFunctionSelector.SvHasher cityhash =
HashFunctionSelector.getSvHasher("cityhash");
+ Assert.assertEquals(cityhash.hash(null), 0);
}
@Test
public void testUnknownHashFunction() {
String value = "test";
// Unknown hash function should default to absHashCode
- int hash = HashFunctionSelector.computeHash(value, "unknown");
- int expectedHash = HashFunctionSelector.computeHash(value, "abshashcode");
+ HashFunctionSelector.SvHasher unknown =
HashFunctionSelector.getSvHasher("unknown");
+ int hash = unknown.hash(value);
+ int expectedHash = _abshashcode.hash(value);
Assert.assertEquals(hash, expectedHash);
}
@Test
public void testCaseInsensitive() {
String value = "test";
- int hash1 = HashFunctionSelector.computeHash(value, "MURMUR");
- int hash2 = HashFunctionSelector.computeHash(value, "murmur");
+ HashFunctionSelector.SvHasher upperCase =
HashFunctionSelector.getSvHasher("MURMUR");
+ int hash1 = upperCase.hash(value);
+ int hash2 = _murmur.hash(value);
Assert.assertEquals(hash1, hash2);
}
}
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
index 83a1eed38ff..65e822699a2 100644
---
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.query.planner.partitioning;
+import java.util.Locale;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -96,14 +97,14 @@ public class KeySelectorHashFunctionTest {
public void testKeySelectorFactoryWithDefaultHashFunction() {
// Test single column
KeySelector<?> singleSelector =
KeySelectorFactory.getKeySelector(java.util.List.of(0));
- Assert.assertEquals(singleSelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM);
+ Assert.assertEquals(singleSelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
// Test multi column
KeySelector<?> multiSelector =
KeySelectorFactory.getKeySelector(java.util.List.of(0, 1));
- Assert.assertEquals(multiSelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM);
+ Assert.assertEquals(multiSelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
// Test empty
KeySelector<?> emptySelector =
KeySelectorFactory.getKeySelector(java.util.List.of());
- Assert.assertEquals(emptySelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM);
+ Assert.assertEquals(emptySelector.hashAlgorithm(),
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]