[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #8408: Combine Metadata and Dictionary based plans into single operator

GitBox Tue, 29 Mar 2022 13:23:07 -0700


Jackie-Jiang commented on a change in pull request #8408:
URL: https://github.com/apache/pinot/pull/8408#discussion_r837877827




##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java
##########
@@ -40,80 +39,90 @@
 import 
org.apache.pinot.core.query.aggregation.function.DistinctCountRawHLLAggregationFunction;
 import 
org.apache.pinot.core.query.aggregation.function.DistinctCountSmartHLLAggregationFunction;
 import org.apache.pinot.segment.local.customobject.MinMaxRangePair;
+import org.apache.pinot.segment.spi.datasource.DataSource;
 import org.apache.pinot.segment.spi.index.reader.Dictionary;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.utils.ByteArray;
 
 
 /**
- * Aggregation operator that utilizes dictionary for serving aggregation 
queries.
- * The dictionary operator is selected in the plan maker, if the query is of 
aggregation type min, max, minmaxrange
- * and the column has a dictionary.
+ * Aggregation operator that utilizes dictionary or column metadata for 
serving aggregation queries to avoid scanning.
+ * The scanless operator is selected in the plan maker, if the query is of 
aggregation type min, max, minmaxrange,
+ * distinctcount, distinctcounthll, distinctcountrawhll, 
segmentpartitioneddistinctcount, distinctcountsmarthll,
+ * and the column has a dictionary, or has column metadata with min and max 
value defined. It also supports count(*) if
+ * the query has no filter.
  * We don't use this operator if the segment has star tree,
- * as the dictionary will have aggregated values for the metrics, and 
dimensions will have star node value
+ * as the dictionary will have aggregated values for the metrics, and 
dimensions will have star node value.
  *
- * For min value, we use the first value from the dictionary
- * For max value we use the last value from dictionary
+ * For min value, we use the first value from the dictionary, falling back to 
the column metadata min value if there
+ * is no dictionary.
+ * For max value we use the last value from dictionary, falling back to the 
column metadata max value if there
+ * is no dictionary.
  */
 @SuppressWarnings("rawtypes")
-public class DictionaryBasedAggregationOperator extends 
BaseOperator<IntermediateResultsBlock> {
-  private static final String OPERATOR_NAME = 
"DictionaryBasedAggregationOperator";
-  private static final String EXPLAIN_NAME = "AGGREGATE_DICTIONARY";
+public class NonScanBasedAggregationOperator extends 
BaseOperator<IntermediateResultsBlock> {
+  private static final String OPERATOR_NAME = 
NonScanBasedAggregationOperator.class.getSimpleName();
+  private static final String EXPLAIN_NAME = "AGGREGATE_NO_SCAN";
 
   private final AggregationFunction[] _aggregationFunctions;
-  private final Map<String, Dictionary> _dictionaryMap;
+  private final DataSource[] _dataSources;
   private final int _numTotalDocs;
 
-  public DictionaryBasedAggregationOperator(AggregationFunction[] 
aggregationFunctions,
-      Map<String, Dictionary> dictionaryMap, int numTotalDocs) {
+  public NonScanBasedAggregationOperator(AggregationFunction[] 
aggregationFunctions,
+      DataSource[] dataSources, int numTotalDocs) {
     _aggregationFunctions = aggregationFunctions;
-    _dictionaryMap = dictionaryMap;
+    _dataSources = dataSources;
     _numTotalDocs = numTotalDocs;
   }
 
   @Override
   protected IntermediateResultsBlock getNextBlock() {
     List<Object> aggregationResults = new 
ArrayList<>(_aggregationFunctions.length);
-    for (AggregationFunction aggregationFunction : _aggregationFunctions) {
-      String column = ((ExpressionContext) 
aggregationFunction.getInputExpressions().get(0)).getIdentifier();
-      Dictionary dictionary = _dictionaryMap.get(column);
+    for (int i = 0; i < _aggregationFunctions.length; i++) {
+      AggregationFunction aggregationFunction = _aggregationFunctions[i];
+      // note that dataSource will be null for COUNT, sp do not interact with 
it until it's known this isn't a COUNT
+      DataSource dataSource = _dataSources[i];
       Object result;
       switch (aggregationFunction.getType()) {
+        case COUNT:
+          result = (long) _numTotalDocs;
+          break;
         case MIN:
         case MINMV:
-          result = toDouble(dictionary.getMinVal());
+          result = getMinValue(dataSource);
           break;
         case MAX:
         case MAXMV:
-          result = toDouble(dictionary.getMaxVal());
+          result = getMaxValue(dataSource);
           break;
         case MINMAXRANGE:
         case MINMAXRANGEMV:
-          result = new MinMaxRangePair(toDouble(dictionary.getMinVal()), 
toDouble(dictionary.getMaxVal()));
+          result = new MinMaxRangePair(getMinValue(dataSource), 
getMaxValue(dataSource));
           break;
         case DISTINCTCOUNT:
         case DISTINCTCOUNTMV:
-          result = getDistinctValueSet(dictionary);
+          result = 
getDistinctValueSet(Objects.requireNonNull(dataSource.getDictionary()));
           break;
         case DISTINCTCOUNTHLL:
         case DISTINCTCOUNTHLLMV:
-          result = getDistinctCountHLLResult(dictionary, 
(DistinctCountHLLAggregationFunction) aggregationFunction);
+          result = 
getDistinctCountHLLResult(Objects.requireNonNull(dataSource.getDictionary()),
+              (DistinctCountHLLAggregationFunction) aggregationFunction);
           break;
         case DISTINCTCOUNTRAWHLL:
         case DISTINCTCOUNTRAWHLLMV:
-          result = getDistinctCountHLLResult(dictionary,
+          result = 
getDistinctCountHLLResult(Objects.requireNonNull(dataSource.getDictionary()),
               ((DistinctCountRawHLLAggregationFunction) 
aggregationFunction).getDistinctCountHLLAggregationFunction());
           break;
         case SEGMENTPARTITIONEDDISTINCTCOUNT:
-          result = (long) dictionary.length();
+          result = (long) 
Objects.requireNonNull(dataSource.getDictionary()).length();
           break;
         case DISTINCTCOUNTSMARTHLL:
-          result = getDistinctCountSmartHLLResult(dictionary,
+          result = 
getDistinctCountSmartHLLResult(Objects.requireNonNull(dataSource.getDictionary()),
               (DistinctCountSmartHLLAggregationFunction) aggregationFunction);
           break;
         default:
           throw new IllegalStateException(
-              "Dictionary based aggregation operator does not support function 
type: " + aggregationFunction.getType());
+              "DataSource based aggregation operator does not support function 
type: " + aggregationFunction.getType());

Review comment:
       (minor)
   ```suggestion
                 "Non-scan based aggregation operator does not support function 
type: " + aggregationFunction.getType());
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

[GitHub] [pinot] Jackie-Jiang commented on a change in pull request #8408: Combine Metadata and Dictionary based plans into single operator

Reply via email to