Jackie-Jiang commented on a change in pull request #5661: URL: https://github.com/apache/incubator-pinot/pull/5661#discussion_r451282387
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/SelectionOrderByOperator.java ########## @@ -143,24 +168,145 @@ public SelectionOrderByOperator(IndexSegment indexSegment, QueryContext queryCon @Override protected IntermediateResultsBlock getNextBlock() { + if (_expressions.size() == _orderByExpressions.size()) { + return computeAllOrdered(); + } else { + return computePartiallyOrdered(); + } + } + + /** + * Helper method to compute the result when all the output expressions are ordered. + */ + private IntermediateResultsBlock computeAllOrdered() { + int numExpressions = _expressions.size(); + + // Fetch all the expressions and insert them into the priority queue + BlockValSet[] blockValSets = new BlockValSet[numExpressions]; TransformBlock transformBlock; while ((transformBlock = _transformOperator.nextBlock()) != null) { - int numExpressions = _expressions.size(); - BlockValSet[] blockValSets = new BlockValSet[numExpressions]; for (int i = 0; i < numExpressions; i++) { ExpressionContext expression = _expressions.get(i); blockValSets[i] = transformBlock.getBlockValueSet(expression); } RowBasedBlockValueFetcher blockValueFetcher = new RowBasedBlockValueFetcher(blockValSets); - int numDocsFetched = transformBlock.getNumDocs(); _numDocsScanned += numDocsFetched; for (int i = 0; i < numDocsFetched; i++) { SelectionOperatorUtils.addToPriorityQueue(blockValueFetcher.getRow(i), _rows, _numRowsToKeep); } } + _numEntriesScannedPostFilter = (long) _numDocsScanned * _transformOperator.getNumColumnsProjected(); - return new IntermediateResultsBlock(_dataSchema, _rows); + // Create the data schema + String[] columnNames = new String[numExpressions]; + DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numExpressions]; + for (int i = 0; i < numExpressions; i++) { + columnNames[i] = _expressions.get(i).toString(); + TransformResultMetadata expressionMetadata = _orderByExpressionMetadata[i]; + columnDataTypes[i] = + DataSchema.ColumnDataType.fromDataType(expressionMetadata.getDataType(), expressionMetadata.isSingleValue()); + } + DataSchema dataSchema = new DataSchema(columnNames, columnDataTypes); + + return new IntermediateResultsBlock(dataSchema, _rows); + } + + /** + * Helper method to compute the result when not all the output expressions are ordered. + */ + private IntermediateResultsBlock computePartiallyOrdered() { Review comment: That is possible, and could save some read for high LIMIT queries (which are always expensive), but requires quite big change because that breaks the assumption of one operator only process one segment. After combining the order-by expressions we need to reopen the segments to read the values. For the second round scan, we might also need multi-threading similar to the first round. Not sure if the optimization is worth the overhead. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org