yashmayya commented on code in PR #16038: URL: https://github.com/apache/pinot/pull/16038#discussion_r2135359159
########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java: ########## @@ -98,8 +104,36 @@ public String toExplainString() { protected void addRowsToRightTable(List<Object[]> rows) { assert _rightTable != null : "Right table should not be null when adding rows"; for (Object[] row : rows) { - _rightTable.addRow(_rightKeySelector.getKey(row), row); + Object key = _rightKeySelector.getKey(row); + // Skip rows with null join keys - they should not participate in equi-joins per SQL standard + if (isNullKey(key)) { + // For RIGHT and FULL JOIN, we need to preserve null key rows for the final output + if (_nullKeyRightRows != null) { + _nullKeyRightRows.add(row); + } + continue; + } + _rightTable.addRow(key, row); + } + } + + /** + * Check if a join key contains null values. In SQL standard, null keys should not match in equi-joins. + **/ + private boolean isNullKey(Object key) { + if (key == null) { + return true; } + // For composite keys (Object[]), check if any component is null + if (key instanceof Object[]) { Review Comment: Isn't the key going to be an instance of `org.apache.pinot.core.data.table.Key` for the composite key case (see `MultiColumnKeySelector`)? ########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java: ########## @@ -98,8 +104,36 @@ public String toExplainString() { protected void addRowsToRightTable(List<Object[]> rows) { assert _rightTable != null : "Right table should not be null when adding rows"; for (Object[] row : rows) { - _rightTable.addRow(_rightKeySelector.getKey(row), row); + Object key = _rightKeySelector.getKey(row); + // Skip rows with null join keys - they should not participate in equi-joins per SQL standard + if (isNullKey(key)) { + // For RIGHT and FULL JOIN, we need to preserve null key rows for the final output + if (_nullKeyRightRows != null) { + _nullKeyRightRows.add(row); + } + continue; + } + _rightTable.addRow(key, row); + } + } + + /** + * Check if a join key contains null values. In SQL standard, null keys should not match in equi-joins. + **/ + private boolean isNullKey(Object key) { + if (key == null) { + return true; } + // For composite keys (Object[]), check if any component is null + if (key instanceof Object[]) { Review Comment: Let's also add a unit test for this case. ########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/HashJoinOperator.java: ########## @@ -59,16 +60,21 @@ public class HashJoinOperator extends BaseJoinOperator { // TODO: Optimize this @Nullable private Map<Object, BitSet> _matchedRightRows; + // Store null key rows separately for RIGHT and FULL JOINs + @Nullable + private List<Object[]> _nullKeyRightRows; public HashJoinOperator(OpChainExecutionContext context, MultiStageOperator leftInput, DataSchema leftSchema, - MultiStageOperator rightInput, JoinNode node) { + MultiStageOperator rightInput, JoinNode node) { super(context, leftInput, leftSchema, rightInput, node); List<Integer> leftKeys = node.getLeftKeys(); Preconditions.checkState(!leftKeys.isEmpty(), "Hash join operator requires join keys"); _leftKeySelector = KeySelectorFactory.getKeySelector(leftKeys); _rightKeySelector = KeySelectorFactory.getKeySelector(node.getRightKeys()); _rightTable = createLookupTable(leftKeys, leftSchema); _matchedRightRows = needUnmatchedRightRows() ? new HashMap<>() : null; + // Initialize _nullKeyRightRows for both RIGHT and FULL JOINs + _nullKeyRightRows = (_joinType == JoinRelType.RIGHT || _joinType == JoinRelType.FULL) ? new ArrayList<>() : null; Review Comment: We should use the `needUnmatchedRightRows` method from the parent `BaseJoinOperator` class instead. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org