yashmayya commented on code in PR #16990:
URL: https://github.com/apache/pinot/pull/16990#discussion_r2418074321
##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/set/UnionOperator.java:
##########
@@ -33,53 +35,57 @@
/**
* Union operator for UNION queries. Unlike {@link UnionAllOperator}, this
operator removes duplicate rows and only
- * returns distinct rows.
+ * returns distinct rows. Each child operator is fully drained sequentially
and distinct rows are returned.
*/
-public class UnionOperator extends RightRowSetBasedSetOperator {
+public class UnionOperator extends SetOperator {
private static final Logger LOGGER =
LoggerFactory.getLogger(UnionOperator.class);
private static final String EXPLAIN_NAME = "UNION";
+ private MseBlock _eosBlock = null;
+ private int _currentOperatorIndex = 0;
+ private final Set<Record> _seenRecords = new ObjectOpenHashSet<>();
+
public UnionOperator(OpChainExecutionContext opChainExecutionContext,
List<MultiStageOperator> inputOperators, DataSchema dataSchema) {
super(opChainExecutionContext, inputOperators, dataSchema);
}
@Override
- protected MseBlock processRightOperator() {
- MseBlock block = _rightChildOperator.nextBlock();
- while (block.isData()) {
- MseBlock.Data dataBlock = (MseBlock.Data) block;
- List<Object[]> rows = new ArrayList<>();
- for (Object[] row : dataBlock.asRowHeap().getRows()) {
- Record record = new Record(row);
- if (!_rightRowSet.contains(record)) {
- // Add a new unique row.
- rows.add(row);
- _rightRowSet.add(record);
+ protected MseBlock getNextBlock()
+ throws Exception {
+ if (_eosBlock != null) {
+ return _eosBlock;
+ }
+
+ while (_currentOperatorIndex < _inputOperators.size()) {
+ MultiStageOperator currentOperator =
_inputOperators.get(_currentOperatorIndex);
+ MseBlock block = currentOperator.nextBlock();
+ if (block.isError()) {
+ _eosBlock = block;
+ return block;
+ } else if (block.isSuccess()) {
+ _currentOperatorIndex++;
+ if (_currentOperatorIndex == _inputOperators.size()) {
+ _eosBlock = block;
+ return block;
+ }
+ } else if (block.isData()) {
+ List<Object[]> rows = new ArrayList<>();
+ for (Object[] row : ((MseBlock.Data) block).asRowHeap().getRows()) {
+ Record record = new Record(row);
+ if (_seenRecords.add(record)) {
+ rows.add(row);
+ }
+ }
Review Comment:
This is probably quite inefficient and we might want to revisit optimizing
the distinct union logic in the future.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]