Jackie-Jiang commented on a change in pull request #8408: URL: https://github.com/apache/pinot/pull/8408#discussion_r837877827
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java ########## @@ -40,80 +39,90 @@ import org.apache.pinot.core.query.aggregation.function.DistinctCountRawHLLAggregationFunction; import org.apache.pinot.core.query.aggregation.function.DistinctCountSmartHLLAggregationFunction; import org.apache.pinot.segment.local.customobject.MinMaxRangePair; +import org.apache.pinot.segment.spi.datasource.DataSource; import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.utils.ByteArray; /** - * Aggregation operator that utilizes dictionary for serving aggregation queries. - * The dictionary operator is selected in the plan maker, if the query is of aggregation type min, max, minmaxrange - * and the column has a dictionary. + * Aggregation operator that utilizes dictionary or column metadata for serving aggregation queries to avoid scanning. + * The scanless operator is selected in the plan maker, if the query is of aggregation type min, max, minmaxrange, + * distinctcount, distinctcounthll, distinctcountrawhll, segmentpartitioneddistinctcount, distinctcountsmarthll, + * and the column has a dictionary, or has column metadata with min and max value defined. It also supports count(*) if + * the query has no filter. * We don't use this operator if the segment has star tree, - * as the dictionary will have aggregated values for the metrics, and dimensions will have star node value + * as the dictionary will have aggregated values for the metrics, and dimensions will have star node value. * - * For min value, we use the first value from the dictionary - * For max value we use the last value from dictionary + * For min value, we use the first value from the dictionary, falling back to the column metadata min value if there + * is no dictionary. + * For max value we use the last value from dictionary, falling back to the column metadata max value if there + * is no dictionary. */ @SuppressWarnings("rawtypes") -public class DictionaryBasedAggregationOperator extends BaseOperator<IntermediateResultsBlock> { - private static final String OPERATOR_NAME = "DictionaryBasedAggregationOperator"; - private static final String EXPLAIN_NAME = "AGGREGATE_DICTIONARY"; +public class NonScanBasedAggregationOperator extends BaseOperator<IntermediateResultsBlock> { + private static final String OPERATOR_NAME = NonScanBasedAggregationOperator.class.getSimpleName(); + private static final String EXPLAIN_NAME = "AGGREGATE_NO_SCAN"; private final AggregationFunction[] _aggregationFunctions; - private final Map<String, Dictionary> _dictionaryMap; + private final DataSource[] _dataSources; private final int _numTotalDocs; - public DictionaryBasedAggregationOperator(AggregationFunction[] aggregationFunctions, - Map<String, Dictionary> dictionaryMap, int numTotalDocs) { + public NonScanBasedAggregationOperator(AggregationFunction[] aggregationFunctions, + DataSource[] dataSources, int numTotalDocs) { _aggregationFunctions = aggregationFunctions; - _dictionaryMap = dictionaryMap; + _dataSources = dataSources; _numTotalDocs = numTotalDocs; } @Override protected IntermediateResultsBlock getNextBlock() { List<Object> aggregationResults = new ArrayList<>(_aggregationFunctions.length); - for (AggregationFunction aggregationFunction : _aggregationFunctions) { - String column = ((ExpressionContext) aggregationFunction.getInputExpressions().get(0)).getIdentifier(); - Dictionary dictionary = _dictionaryMap.get(column); + for (int i = 0; i < _aggregationFunctions.length; i++) { + AggregationFunction aggregationFunction = _aggregationFunctions[i]; + // note that dataSource will be null for COUNT, sp do not interact with it until it's known this isn't a COUNT + DataSource dataSource = _dataSources[i]; Object result; switch (aggregationFunction.getType()) { + case COUNT: + result = (long) _numTotalDocs; + break; case MIN: case MINMV: - result = toDouble(dictionary.getMinVal()); + result = getMinValue(dataSource); break; case MAX: case MAXMV: - result = toDouble(dictionary.getMaxVal()); + result = getMaxValue(dataSource); break; case MINMAXRANGE: case MINMAXRANGEMV: - result = new MinMaxRangePair(toDouble(dictionary.getMinVal()), toDouble(dictionary.getMaxVal())); + result = new MinMaxRangePair(getMinValue(dataSource), getMaxValue(dataSource)); break; case DISTINCTCOUNT: case DISTINCTCOUNTMV: - result = getDistinctValueSet(dictionary); + result = getDistinctValueSet(Objects.requireNonNull(dataSource.getDictionary())); break; case DISTINCTCOUNTHLL: case DISTINCTCOUNTHLLMV: - result = getDistinctCountHLLResult(dictionary, (DistinctCountHLLAggregationFunction) aggregationFunction); + result = getDistinctCountHLLResult(Objects.requireNonNull(dataSource.getDictionary()), + (DistinctCountHLLAggregationFunction) aggregationFunction); break; case DISTINCTCOUNTRAWHLL: case DISTINCTCOUNTRAWHLLMV: - result = getDistinctCountHLLResult(dictionary, + result = getDistinctCountHLLResult(Objects.requireNonNull(dataSource.getDictionary()), ((DistinctCountRawHLLAggregationFunction) aggregationFunction).getDistinctCountHLLAggregationFunction()); break; case SEGMENTPARTITIONEDDISTINCTCOUNT: - result = (long) dictionary.length(); + result = (long) Objects.requireNonNull(dataSource.getDictionary()).length(); break; case DISTINCTCOUNTSMARTHLL: - result = getDistinctCountSmartHLLResult(dictionary, + result = getDistinctCountSmartHLLResult(Objects.requireNonNull(dataSource.getDictionary()), (DistinctCountSmartHLLAggregationFunction) aggregationFunction); break; default: throw new IllegalStateException( - "Dictionary based aggregation operator does not support function type: " + aggregationFunction.getType()); + "DataSource based aggregation operator does not support function type: " + aggregationFunction.getType()); Review comment: (minor) ```suggestion "Non-scan based aggregation operator does not support function type: " + aggregationFunction.getType()); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org