This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 20b557db44 Modify primitive lookup tables to accept null values (#15848) 20b557db44 is described below commit 20b557db4427c4919ec8ded4564c79050cb8852c Author: Gonzalo Ortiz Jaureguizar <gor...@users.noreply.github.com> AuthorDate: Fri May 23 01:41:59 2025 +0200 Modify primitive lookup tables to accept null values (#15848) --- .../query/runtime/operator/HashJoinOperator.java | 8 +- .../runtime/operator/join/DoubleLookupTable.java | 12 +-- .../runtime/operator/join/FloatLookupTable.java | 12 +-- .../runtime/operator/join/IntLookupTable.java | 21 ++--- .../runtime/operator/join/LongLookupTable.java | 12 +-- .../query/runtime/operator/join/LookupTable.java | 15 ++- .../runtime/operator/join/ObjectLookupTable.java | 10 +- .../operator/join/PrimitiveLookupTable.java | 103 +++++++++++++++++++++ .../src/test/resources/queries/NullHandling.json | 20 ++++ 9 files changed, 169 insertions(+), 44 deletions(-) diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java index d871f7626e..5d4294546c 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java @@ -260,15 +260,15 @@ public class HashJoinOperator extends BaseJoinOperator { protected List<Object[]> buildNonMatchRightRows() { List<Object[]> rows = new ArrayList<>(); if (_rightTable.isKeysUnique()) { - for (Map.Entry<Object, Object[]> entry : _rightTable.entrySet()) { - Object[] rightRow = entry.getValue(); + for (Map.Entry<Object, Object> entry : _rightTable.entrySet()) { + Object[] rightRow = (Object[]) entry.getValue(); if (!_matchedRightRows.containsKey(entry.getKey())) { rows.add(joinRow(null, rightRow)); } } } else { - for (Map.Entry<Object, ArrayList<Object[]>> entry : _rightTable.entrySet()) { - List<Object[]> rightRows = entry.getValue(); + for (Map.Entry<Object, Object> entry : _rightTable.entrySet()) { + List<Object[]> rightRows = ((List<Object[]>) entry.getValue()); BitSet matchedIndices = _matchedRightRows.get(entry.getKey()); if (matchedIndices == null) { for (Object[] rightRow : rightRows) { diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/DoubleLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/DoubleLookupTable.java index 77c9266d39..3ca59141e2 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/DoubleLookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/DoubleLookupTable.java @@ -29,16 +29,16 @@ import javax.annotation.Nullable; * The {@code DoubleLookupTable} is a lookup table for double keys. */ @SuppressWarnings("unchecked") -public class DoubleLookupTable extends LookupTable { +public class DoubleLookupTable extends PrimitiveLookupTable { private final Double2ObjectOpenHashMap<Object> _lookupTable = new Double2ObjectOpenHashMap<>(INITIAL_CAPACITY); @Override - public void addRow(Object key, Object[] row) { + public void addRowNotNullKey(Object key, Object[] row) { _lookupTable.compute((double) key, (k, v) -> computeNewValue(row, v)); } @Override - public void finish() { + public void finishNotNullKey() { if (!_keysUnique) { for (Double2ObjectMap.Entry<Object> entry : _lookupTable.double2ObjectEntrySet()) { convertValueToList(entry); @@ -47,19 +47,19 @@ public class DoubleLookupTable extends LookupTable { } @Override - public boolean containsKey(Object key) { + public boolean containsNotNullKey(Object key) { return _lookupTable.containsKey((double) key); } @Nullable @Override - public Object lookup(Object key) { + public Object lookupNotNullKey(Object key) { return _lookupTable.get((double) key); } @SuppressWarnings("rawtypes") @Override - public Set<Map.Entry> entrySet() { + public Set<Map.Entry<Object, Object>> notNullKeyEntrySet() { return (Set) _lookupTable.double2ObjectEntrySet(); } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/FloatLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/FloatLookupTable.java index 437b3f8547..ae7a2c106c 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/FloatLookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/FloatLookupTable.java @@ -29,16 +29,16 @@ import javax.annotation.Nullable; * The {@code FloatLookupTable} is a lookup table for float keys. */ @SuppressWarnings("unchecked") -public class FloatLookupTable extends LookupTable { +public class FloatLookupTable extends PrimitiveLookupTable { private final Float2ObjectOpenHashMap<Object> _lookupTable = new Float2ObjectOpenHashMap<>(INITIAL_CAPACITY); @Override - public void addRow(Object key, Object[] row) { + public void addRowNotNullKey(Object key, Object[] row) { _lookupTable.compute((float) key, (k, v) -> computeNewValue(row, v)); } @Override - public void finish() { + public void finishNotNullKey() { if (!_keysUnique) { for (Float2ObjectMap.Entry<Object> entry : _lookupTable.float2ObjectEntrySet()) { convertValueToList(entry); @@ -47,19 +47,19 @@ public class FloatLookupTable extends LookupTable { } @Override - public boolean containsKey(Object key) { + public boolean containsNotNullKey(Object key) { return _lookupTable.containsKey((float) key); } @Nullable @Override - public Object lookup(Object key) { + public Object lookupNotNullKey(Object key) { return _lookupTable.get((float) key); } @SuppressWarnings("rawtypes") @Override - public Set<Map.Entry> entrySet() { + public Set<Map.Entry<Object, Object>> notNullKeyEntrySet() { return (Set) _lookupTable.float2ObjectEntrySet(); } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/IntLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/IntLookupTable.java index 688192b6cc..fb6ef693a0 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/IntLookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/IntLookupTable.java @@ -22,44 +22,39 @@ import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import java.util.Map; import java.util.Set; -import javax.annotation.Nullable; /** * The {@code IntLookupTable} is a lookup table for int keys. */ @SuppressWarnings("unchecked") -public class IntLookupTable extends LookupTable { +public class IntLookupTable extends PrimitiveLookupTable { private final Int2ObjectOpenHashMap<Object> _lookupTable = new Int2ObjectOpenHashMap<>(INITIAL_CAPACITY); @Override - public void addRow(Object key, Object[] row) { + protected void addRowNotNullKey(Object key, Object[] row) { _lookupTable.compute((int) key, (k, v) -> computeNewValue(row, v)); } @Override - public void finish() { - if (!_keysUnique) { - for (Int2ObjectMap.Entry<Object> entry : _lookupTable.int2ObjectEntrySet()) { - convertValueToList(entry); - } + protected void finishNotNullKey() { + for (Int2ObjectMap.Entry<Object> entry : _lookupTable.int2ObjectEntrySet()) { + convertValueToList(entry); } } @Override - public boolean containsKey(Object key) { + protected boolean containsNotNullKey(Object key) { return _lookupTable.containsKey((int) key); } - @Nullable @Override - public Object lookup(Object key) { + protected Object lookupNotNullKey(Object key) { return _lookupTable.get((int) key); } - @SuppressWarnings("rawtypes") @Override - public Set<Map.Entry> entrySet() { + protected Set<Map.Entry<Object, Object>> notNullKeyEntrySet() { return (Set) _lookupTable.int2ObjectEntrySet(); } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LongLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LongLookupTable.java index 5e393f4647..44fc2ed61d 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LongLookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LongLookupTable.java @@ -29,16 +29,16 @@ import javax.annotation.Nullable; * The {@code LongLookupTable} is a lookup table for long keys. */ @SuppressWarnings("unchecked") -public class LongLookupTable extends LookupTable { +public class LongLookupTable extends PrimitiveLookupTable { private final Long2ObjectOpenHashMap<Object> _lookupTable = new Long2ObjectOpenHashMap<>(INITIAL_CAPACITY); @Override - public void addRow(Object key, Object[] row) { + public void addRowNotNullKey(Object key, Object[] row) { _lookupTable.compute((long) key, (k, v) -> computeNewValue(row, v)); } @Override - public void finish() { + public void finishNotNullKey() { if (!_keysUnique) { for (Long2ObjectMap.Entry<Object> entry : _lookupTable.long2ObjectEntrySet()) { convertValueToList(entry); @@ -47,19 +47,19 @@ public class LongLookupTable extends LookupTable { } @Override - public boolean containsKey(Object key) { + public boolean containsNotNullKey(Object key) { return _lookupTable.containsKey((long) key); } @Nullable @Override - public Object lookup(Object key) { + public Object lookupNotNullKey(Object key) { return _lookupTable.get((long) key); } @SuppressWarnings("rawtypes") @Override - public Set<Map.Entry> entrySet() { + public Set<Map.Entry<Object, Object>> notNullKeyEntrySet() { return (Set) _lookupTable.long2ObjectEntrySet(); } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LookupTable.java index 0b62092bbe..86f811eca5 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/LookupTable.java @@ -35,7 +35,7 @@ public abstract class LookupTable { /** * Adds a row to the lookup table. */ - public abstract void addRow(Object key, Object[] row); + public abstract void addRow(@Nullable Object key, Object[] row); @SuppressWarnings("unchecked") protected Object computeNewValue(Object[] row, @Nullable Object currentValue) { @@ -68,6 +68,13 @@ public abstract class LookupTable { } } + protected static Object convertValueToList(Object value) { + if (value instanceof Object[]) { + return Collections.singletonList(value); + } + return value; + } + /** * Returns {@code true} when all the keys added to the lookup table are unique. * When all keys are unique, the value of the lookup table is a single row ({@code Object[]}). When keys are not @@ -80,7 +87,7 @@ public abstract class LookupTable { /** * Returns {@code true} if the lookup table contains the given key. */ - public abstract boolean containsKey(Object key); + public abstract boolean containsKey(@Nullable Object key); /** * Returns the row/rows for the given key. When {@link #isKeysUnique} returns {@code true}, this method returns a @@ -88,7 +95,7 @@ public abstract class LookupTable { * ({@code List<Object[]>}). Returns {@code null} if the key does not exist in the lookup table. */ @Nullable - public abstract Object lookup(Object key); + public abstract Object lookup(@Nullable Object key); /** * Returns all the entries in the lookup table. When {@link #isKeysUnique} returns {@code true}, the value of the @@ -96,5 +103,5 @@ public abstract class LookupTable { * entries is a list of rows ({@code List<Object[]>}). */ @SuppressWarnings("rawtypes") - public abstract Set<Map.Entry> entrySet(); + public abstract Set<Map.Entry<Object, Object>> entrySet(); } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/ObjectLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/ObjectLookupTable.java index f455b1a8c3..02b00dfd3f 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/ObjectLookupTable.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/ObjectLookupTable.java @@ -32,7 +32,7 @@ public class ObjectLookupTable extends LookupTable { private final Map<Object, Object> _lookupTable = Maps.newHashMapWithExpectedSize(INITIAL_CAPACITY); @Override - public void addRow(Object key, Object[] row) { + public void addRow(@Nullable Object key, Object[] row) { _lookupTable.compute(key, (k, v) -> computeNewValue(row, v)); } @@ -46,19 +46,19 @@ public class ObjectLookupTable extends LookupTable { } @Override - public boolean containsKey(Object key) { + public boolean containsKey(@Nullable Object key) { return _lookupTable.containsKey(key); } @Nullable @Override - public Object lookup(Object key) { + public Object lookup(@Nullable Object key) { return _lookupTable.get(key); } @SuppressWarnings("rawtypes") @Override - public Set<Map.Entry> entrySet() { - return (Set) _lookupTable.entrySet(); + public Set<Map.Entry<Object, Object>> entrySet() { + return _lookupTable.entrySet(); } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/PrimitiveLookupTable.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/PrimitiveLookupTable.java new file mode 100644 index 0000000000..af5c01e063 --- /dev/null +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/join/PrimitiveLookupTable.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.runtime.operator.join; + +import com.google.common.collect.Sets; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; + + +public abstract class PrimitiveLookupTable extends LookupTable { + + private Object _valueForNullKey; + + @Override + public void addRow(@Nullable Object key, Object[] row) { + if (key == null) { + _valueForNullKey = computeNewValue(row, _valueForNullKey); + return; + } + addRowNotNullKey(key, row); + } + + @Override + public void finish() { + if (!_keysUnique) { + if (_valueForNullKey != null) { + _valueForNullKey = convertValueToList(_valueForNullKey); + } + finishNotNullKey(); + } + } + + protected abstract void finishNotNullKey(); + + protected abstract void addRowNotNullKey(Object key, Object[] row); + + @Override + public boolean containsKey(@Nullable Object key) { + if (key == null) { + return _valueForNullKey != null; + } + return containsNotNullKey(key); + } + + protected abstract boolean containsNotNullKey(Object key); + + @Nullable + @Override + public Object lookup(@Nullable Object key) { + if (key == null) { + return _valueForNullKey; + } + return lookupNotNullKey(key); + } + + protected abstract Object lookupNotNullKey(Object key); + + @SuppressWarnings("rawtypes") + @Override + public Set<Map.Entry<Object, Object>> entrySet() { + Set<Map.Entry<Object, Object>> notNullSet = notNullKeyEntrySet(); + if (_valueForNullKey != null) { + Set<Map.Entry<Object, Object>> nullEntry = Set.of(new Map.Entry<>() { + @Override + public Object getKey() { + return null; + } + + @Override + public Object getValue() { + return _valueForNullKey; + } + + @Override + public Object setValue(Object value) { + throw new UnsupportedOperationException(); + } + }); + return Sets.union(notNullSet, nullEntry); + } else { + return notNullSet; + } + } + + protected abstract Set<Map.Entry<Object, Object>> notNullKeyEntrySet(); +} diff --git a/pinot-query-runtime/src/test/resources/queries/NullHandling.json b/pinot-query-runtime/src/test/resources/queries/NullHandling.json index 95e6b6c0c5..ed1375a0ac 100644 --- a/pinot-query-runtime/src/test/resources/queries/NullHandling.json +++ b/pinot-query-runtime/src/test/resources/queries/NullHandling.json @@ -64,6 +64,26 @@ "description": "LEFT JOIN and GROUP BY with AGGREGATE AND SORT", "sql": "SELECT {tbl1}.strCol2, COUNT({tbl2}.intCol1), MIN({tbl2}.intCol1) AS minCol, MAX({tbl2}.doubleCol1) AS maxCol, SUM({tbl2}.doubleCol1) FROM {tbl1} LEFT OUTER JOIN {tbl2} ON {tbl1}.strCol1 = {tbl2}.strCol1 GROUP BY {tbl1}.strCol2 ORDER BY minCol DESC NULLS LAST, maxCol ASC NULLS LAST", "keepOutputRowOrder": true + }, + { + "description": "Joining with nullable int column", + "sql": "SET enableNullHandling = TRUE; WITH tableWithNull AS (SELECT cast(intCol1 as INT) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as INT) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1", + "h2Sql": " WITH tableWithNull AS (SELECT cast(intCol1 as INT) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as INT) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1" + }, + { + "description": "Joining with nullable long column", + "sql": "SET enableNullHandling = TRUE; WITH tableWithNull AS (SELECT cast(intCol1 as LONG) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as LONG) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1", + "h2Sql": " WITH tableWithNull AS (SELECT cast(intCol1 as LONG) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as LONG) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1" + }, + { + "description": "Joining with nullable float column", + "sql": "SET enableNullHandling = TRUE; WITH tableWithNull AS (SELECT cast(intCol1 as FLOAT) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as FLOAT) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1", + "h2Sql": " WITH tableWithNull AS (SELECT cast(intCol1 as FLOAT) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as FLOAT) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1" + }, + { + "description": "Joining with nullable double column", + "sql": "SET enableNullHandling = TRUE; WITH tableWithNull AS (SELECT cast(intCol1 as DOUBLE) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as DOUBLE) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1", + "h2Sql": " WITH tableWithNull AS (SELECT cast(intCol1 as DOUBLE) col1, CASE WHEN intCol1 > 0 THEN NULL ELSE cast(intCol1 as DOUBLE) END AS nullableCol1 FROM {tbl1}) SELECT t1.col1 FROM tableWithNull AS t1 JOIN tableWithNull AS t2 ON t1.col1 = t2.nullableCol1" } ] }, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org