This is an automated email from the ASF dual-hosted git repository.

gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 9baf132185c Join improvements (#16830)
9baf132185c is described below

commit 9baf132185cdccea83acb6fef453530bd4edb4ef
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Mon Sep 29 09:00:34 2025 +0200

    Join improvements (#16830)
---
 .../org/apache/pinot/perf/BenchmarkEquiJoin.java   |  1 -
 .../planner/partitioning/EmptyKeySelector.java     |  4 +-
 .../planner/partitioning/HashFunctionSelector.java | 89 +++++++++++++++-------
 .../partitioning/MultiColumnKeySelector.java       |  6 +-
 .../partitioning/SingleColumnKeySelector.java      |  6 +-
 .../partitioning/HashFunctionSelectorTest.java     | 54 +++++++------
 .../partitioning/KeySelectorHashFunctionTest.java  |  7 +-
 7 files changed, 107 insertions(+), 60 deletions(-)

diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
index 507e26aea41..89a79056273 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkEquiJoin.java
@@ -124,7 +124,6 @@ public class BenchmarkEquiJoin extends 
BaseClusterIntegrationTest {
         + "JOIN MyTable t2 "
         + "ON t1.intCol = t2.intCol "
         + "WHERE t2.intCol % 10 = 0";
-    ;
     return query(query);
   }
 
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
index 087d61ff11e..9a9f5a7c133 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/EmptyKeySelector.java
@@ -29,13 +29,13 @@ public class EmptyKeySelector implements 
KeySelector<Integer> {
   }
 
   private EmptyKeySelector(String hashFunction) {
-    _hashFunction = hashFunction;
+    _hashFunction = hashFunction.toLowerCase();
   }
 
   public static final EmptyKeySelector INSTANCE = new EmptyKeySelector();
 
   public static EmptyKeySelector getInstance(String hashFunction) {
-    if (KeySelector.DEFAULT_HASH_ALGORITHM.equals(hashFunction)) {
+    if (KeySelector.DEFAULT_HASH_ALGORITHM.equalsIgnoreCase(hashFunction)) {
       return INSTANCE;
     }
     return new EmptyKeySelector(hashFunction);
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
index 6ab6ee7ccde..49fce607c44 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelector.java
@@ -34,47 +34,80 @@ public class HashFunctionSelector {
   private HashFunctionSelector() {
   }
 
+  public interface SvHasher {
+    int hash(Object value);
+  }
+
+  public interface MvHasher {
+
+    /**
+     * Computes a hash code for multiple values based on specified key IDs 
using the specified hash function.
+     * This is useful for partitioning where only certain keys are relevant.
+     * @param values The array of values to hash.
+     * @param keyIds The array of key IDs indicating which values to include 
in the hash computation.
+     * @return The computed hash code.
+     */
+    int hash(Object[] values, int[] keyIds);
+  }
+
   /**
-   * Computes a hash code for a single value using the specified hash function.
-   * @param value The value to hash.
+   * Returns a hasher function based on the specified hash function.
    * @param hashFunction The hash function to use (e.g., "murmur", "murmur3", 
"cityhash", "absHashCode").
    * @return The computed hash code.
    */
-  public static int computeHash(Object value, String hashFunction) {
-    if (value == null) {
-      return 0;
-    }
-
+  public static SvHasher getSvHasher(String hashFunction) {
     switch (hashFunction.toLowerCase()) {
-      case MURMUR2: return murmur2(value);
-      case MURMUR3: return murmur3(value);
-      // hashCode and absHashCode are treated the same for single hash.
+      case MURMUR2:
+        return value -> {
+          if (value == null) {
+            return 0;
+          }
+          return HashFunctionSelector.murmur2(value);
+        };
+      case MURMUR3:
+        return value -> {
+          if (value == null) {
+            return 0;
+          }
+          return HashFunctionSelector.murmur3(value);
+        };
       case HASH_CODE:
       // Default hash is absHashCode.
-      default: return absHashCode(value);
+      default:
+        return value -> {
+          if (value == null) {
+            return 0;
+          }
+          return HashFunctionSelector.absHashCode(value);
+        };
     }
   }
 
-  /**
-   * Computes a hash code for multiple values based on specified key IDs using 
the specified hash function.
-   * This is useful for partitioning where only certain keys are relevant.
-   * @param values The array of values to hash.
-   * @param keyIds The array of key IDs indicating which values to include in 
the hash computation.
-   * @param hashFunction The hash function to use (e.g., "murmur2", "murmur3", 
"cityhash", "absHashCode").
-   * @return The computed hash code.
-   */
-  public static int computeMultiHash(Object[] values, int[] keyIds, String 
hashFunction) {
-    if (values == null || values.length == 0) {
-      return 0;
-    }
-
+  public static MvHasher getMvHasher(String hashFunction) {
     switch (hashFunction.toLowerCase()) {
-      case MURMUR2: return murmur2(values, keyIds);
-      case MURMUR3: return murmur3(values, keyIds);
-      // hashCode and absHashCode are treated the same for multi hash.
+      case MURMUR2:
+        return (values, keyIds) -> {
+          if (values == null || values.length == 0) {
+            return 0;
+          }
+          return murmur2(values, keyIds);
+        };
+      case MURMUR3:
+        return (values, keyIds) -> {
+          if (values == null || values.length == 0) {
+            return 0;
+          }
+          return murmur3(values, keyIds);
+        };
       case HASH_CODE:
         // We should hashCode instead of absHashCode for multi hash to 
maintain consistency with legacy behavior.
-      default: return hashCode(values, keyIds);
+      default:
+        return (values, keyIds) -> {
+          if (values == null || values.length == 0) {
+            return 0;
+          }
+          return hashCode(values, keyIds);
+        };
     }
   }
 
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
index 51fe01668f3..2cf175bb966 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/MultiColumnKeySelector.java
@@ -24,6 +24,7 @@ import org.apache.pinot.core.data.table.Key;
 public class MultiColumnKeySelector implements KeySelector<Key> {
   private final int[] _keyIds;
   private final String _hashFunction;
+  private final HashFunctionSelector.MvHasher _hasher;
 
   public MultiColumnKeySelector(int[] keyIds) {
     this(keyIds, KeySelector.DEFAULT_HASH_ALGORITHM);
@@ -31,7 +32,8 @@ public class MultiColumnKeySelector implements 
KeySelector<Key> {
 
   public MultiColumnKeySelector(int[] keyIds, String hashFunction) {
     _keyIds = keyIds;
-    _hashFunction = hashFunction;
+    _hashFunction = hashFunction.toLowerCase();
+    _hasher = HashFunctionSelector.getMvHasher(_hashFunction);
   }
 
   @Override
@@ -63,7 +65,7 @@ public class MultiColumnKeySelector implements 
KeySelector<Key> {
     //
     // TODO: consider better hashing algorithms than hashCode sum, such as 
XOR'ing
     // return a positive number because this is used directly to modulo-index
-    return HashFunctionSelector.computeMultiHash(input, _keyIds, 
_hashFunction);
+    return _hasher.hash(input, _keyIds);
   }
 
   @Override
diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
index 3d19523941c..c3cdc892baf 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/partitioning/SingleColumnKeySelector.java
@@ -24,6 +24,7 @@ import javax.annotation.Nullable;
 public class SingleColumnKeySelector implements KeySelector<Object> {
   private final int _keyId;
   private final String _hashFunction;
+  private final HashFunctionSelector.SvHasher _hasher;
 
   public SingleColumnKeySelector(int keyId) {
     this(keyId, KeySelector.DEFAULT_HASH_ALGORITHM);
@@ -31,7 +32,8 @@ public class SingleColumnKeySelector implements 
KeySelector<Object> {
 
   public SingleColumnKeySelector(int keyId, String hashFunction) {
     _keyId = keyId;
-    _hashFunction = hashFunction;
+    _hashFunction = hashFunction.toLowerCase();
+    _hasher = HashFunctionSelector.getSvHasher(_hashFunction);
   }
 
   @Nullable
@@ -43,7 +45,7 @@ public class SingleColumnKeySelector implements 
KeySelector<Object> {
   @Override
   public int computeHash(Object[] input) {
     Object key = input[_keyId];
-    return HashFunctionSelector.computeHash(key, _hashFunction);
+    return _hasher.hash(key);
   }
 
   @Override
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
index d6d6a2209cf..c67c337b11a 100644
--- 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/HashFunctionSelectorTest.java
@@ -27,11 +27,16 @@ import org.testng.annotations.Test;
  */
 public class HashFunctionSelectorTest {
 
+  private final HashFunctionSelector.SvHasher _abshashcode = 
HashFunctionSelector.getSvHasher("abshashcode");
+  private final HashFunctionSelector.SvHasher _murmur = 
HashFunctionSelector.getSvHasher("murmur");
+  private final HashFunctionSelector.SvHasher _murmur2 = 
HashFunctionSelector.getSvHasher("murmur2");
+  private final HashFunctionSelector.SvHasher _murmur3 = 
HashFunctionSelector.getSvHasher("murmur3");
+
   @Test
   public void testAbsHashCode() {
     String value = "test";
-    int hash1 = HashFunctionSelector.computeHash(value, "abshashcode");
-    int hash2 = HashFunctionSelector.computeHash(value, "abshashcode");
+    int hash1 = _abshashcode.hash(value);
+    int hash2 = _abshashcode.hash(value);
 
     // Same input should produce same hash
     Assert.assertEquals(hash1, hash2);
@@ -43,8 +48,8 @@ public class HashFunctionSelectorTest {
   @Test
   public void testMurmur2() {
     String value = "test";
-    int hash1 = HashFunctionSelector.computeHash(value, "murmur");
-    int hash2 = HashFunctionSelector.computeHash(value, "murmur");
+    int hash1 = _murmur.hash(value);
+    int hash2 = _murmur.hash(value);
 
     // Same input should produce same hash
     Assert.assertEquals(hash1, hash2);
@@ -53,15 +58,16 @@ public class HashFunctionSelectorTest {
     Assert.assertTrue(hash1 >= 0);
 
     // Should be different from absHashCode
-    int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
+    int absHash = _abshashcode.hash(value);
     Assert.assertNotEquals(hash1, absHash);
   }
 
   @Test
   public void testMurmur3() {
     String value = "test";
-    int hash1 = HashFunctionSelector.computeHash(value, "murmur3");
-    int hash2 = HashFunctionSelector.computeHash(value, "murmur3");
+
+    int hash1 = _murmur3.hash(value);
+    int hash2 = _murmur3.hash(value);
 
     // Same input should produce same hash
     Assert.assertEquals(hash1, hash2);
@@ -70,8 +76,8 @@ public class HashFunctionSelectorTest {
     Assert.assertTrue(hash1 >= 0);
 
     // Should be different from other hash functions
-    int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
-    int murmur2Hash = HashFunctionSelector.computeHash(value, "murmur2");
+    int absHash = _abshashcode.hash(value);
+    int murmur2Hash = _murmur2.hash(value);
     Assert.assertNotEquals(hash1, absHash);
     Assert.assertNotEquals(hash1, murmur2Hash);
   }
@@ -79,8 +85,9 @@ public class HashFunctionSelectorTest {
   @Test
   public void testHashCode() {
     String value = "test";
-    int hash1 = HashFunctionSelector.computeHash(value, "hashcode");
-    int hash2 = HashFunctionSelector.computeHash(value, "hashcode");
+    HashFunctionSelector.SvHasher svHasher = 
HashFunctionSelector.getSvHasher("hashcode");
+    int hash1 = svHasher.hash(value);
+    int hash2 = svHasher.hash(value);
 
     // Same input should produce same hash
     Assert.assertEquals(hash1, hash2);
@@ -89,9 +96,9 @@ public class HashFunctionSelectorTest {
     Assert.assertTrue(hash1 >= 0);
 
     // Should be different from murmur and murmur3 but same as absHashCode
-    int absHash = HashFunctionSelector.computeHash(value, "abshashcode");
-    int murmur2Hash = HashFunctionSelector.computeHash(value, "murmur");
-    int murmur3Hash = HashFunctionSelector.computeHash(value, "murmur3");
+    int absHash = _abshashcode.hash(value);
+    int murmur2Hash = _murmur.hash(value);
+    int murmur3Hash = _murmur3.hash(value);
     Assert.assertEquals(hash1, absHash);
     Assert.assertNotEquals(hash1, murmur2Hash);
     Assert.assertNotEquals(hash1, murmur3Hash);
@@ -100,26 +107,29 @@ public class HashFunctionSelectorTest {
   @Test
   public void testNullValue() {
     // Null values should return 0 for all hash functions
-    Assert.assertEquals(HashFunctionSelector.computeHash(null, "abshashcode"), 
0);
-    Assert.assertEquals(HashFunctionSelector.computeHash(null, "murmur"), 0);
-    Assert.assertEquals(HashFunctionSelector.computeHash(null, "murmur3"), 0);
-    Assert.assertEquals(HashFunctionSelector.computeHash(null, "cityhash"), 0);
+    Assert.assertEquals(_abshashcode.hash(null), 0);
+    Assert.assertEquals(_murmur.hash(null), 0);
+    Assert.assertEquals(_murmur3.hash(null), 0);
+    HashFunctionSelector.SvHasher cityhash = 
HashFunctionSelector.getSvHasher("cityhash");
+    Assert.assertEquals(cityhash.hash(null), 0);
   }
 
   @Test
   public void testUnknownHashFunction() {
     String value = "test";
     // Unknown hash function should default to absHashCode
-    int hash = HashFunctionSelector.computeHash(value, "unknown");
-    int expectedHash = HashFunctionSelector.computeHash(value, "abshashcode");
+    HashFunctionSelector.SvHasher unknown = 
HashFunctionSelector.getSvHasher("unknown");
+    int hash = unknown.hash(value);
+    int expectedHash = _abshashcode.hash(value);
     Assert.assertEquals(hash, expectedHash);
   }
 
   @Test
   public void testCaseInsensitive() {
     String value = "test";
-    int hash1 = HashFunctionSelector.computeHash(value, "MURMUR");
-    int hash2 = HashFunctionSelector.computeHash(value, "murmur");
+    HashFunctionSelector.SvHasher upperCase = 
HashFunctionSelector.getSvHasher("MURMUR");
+    int hash1 = upperCase.hash(value);
+    int hash2 = _murmur.hash(value);
     Assert.assertEquals(hash1, hash2);
   }
 }
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
index 83a1eed38ff..65e822699a2 100644
--- 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/partitioning/KeySelectorHashFunctionTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.query.planner.partitioning;
 
+import java.util.Locale;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -96,14 +97,14 @@ public class KeySelectorHashFunctionTest {
   public void testKeySelectorFactoryWithDefaultHashFunction() {
     // Test single column
     KeySelector<?> singleSelector = 
KeySelectorFactory.getKeySelector(java.util.List.of(0));
-    Assert.assertEquals(singleSelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM);
+    Assert.assertEquals(singleSelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
 
     // Test multi column
     KeySelector<?> multiSelector = 
KeySelectorFactory.getKeySelector(java.util.List.of(0, 1));
-    Assert.assertEquals(multiSelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM);
+    Assert.assertEquals(multiSelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
 
     // Test empty
     KeySelector<?> emptySelector = 
KeySelectorFactory.getKeySelector(java.util.List.of());
-    Assert.assertEquals(emptySelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM);
+    Assert.assertEquals(emptySelector.hashAlgorithm(), 
KeySelector.DEFAULT_HASH_ALGORITHM.toLowerCase(Locale.US));
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to