Jackie-Jiang commented on a change in pull request #7916:
URL: https://github.com/apache/pinot/pull/7916#discussion_r787207706



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/ProjectionBlock.java
##########
@@ -52,7 +52,7 @@ public BlockValSet getBlockValueSet(String column) {
 
   @Override
   public BlockDocIdSet getBlockDocIdSet() {
-    throw new UnsupportedOperationException();
+    return null;

Review comment:
       Revert this since it is no longer relevant

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean 
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
     }
     return true;
   }
+
+  /**
+   * Build a FilteredAggregationOperator given the parameters.
+   * @param mainPredicateFilterOperator Filter operator corresponding to the 
main predicate
+   * @param mainTransformOperator Transform operator corresponding to the main 
predicate
+   * @param aggregationFunctions Aggregation functions in the query
+   * @param numTotalDocs Number of total docs
+   */
+  private BaseOperator<IntermediateResultsBlock> 
buildOperatorForFilteredAggregations(
+      BaseFilterOperator mainPredicateFilterOperator,

Review comment:
       (code format) Can you apply the latest [code 
format](https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#intellij)
 and reformat this file? Several places does not follow the code format. Also, 
can we reduce some empty lines in this method?

##########
File path: 
pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
##########
@@ -0,0 +1,512 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.queries;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.response.broker.ResultTable;
+import org.apache.pinot.common.utils.DataSchema;
+import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.ImmutableSegment;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.RecordReader;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+public class FilteredAggregationsTest extends BaseQueriesTest {
+  private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), 
"FilteredAggregationsTest");
+  private static final String TABLE_NAME = "MyTable";
+  private static final String FIRST_SEGMENT_NAME = "firstTestSegment";
+  private static final String SECOND_SEGMENT_NAME = "secondTestSegment";
+  private static final String INT_COL_NAME = "INT_COL";
+  private static final String NO_INDEX_INT_COL_NAME = "NO_INDEX_COL";
+  private static final String STATIC_INT_COL_NAME = "STATIC_INT_COL";
+  private static final Integer INT_BASE_VALUE = 0;
+  private static final Integer NUM_ROWS = 30000;
+
+
+  private IndexSegment _indexSegment;
+  private List<IndexSegment> _indexSegments;
+
+  @Override
+  protected String getFilter() {
+    return "";
+  }
+
+  @Override
+  protected IndexSegment getIndexSegment() {
+    return _indexSegment;
+  }
+
+  @Override
+  protected List<IndexSegment> getIndexSegments() {
+    return _indexSegments;
+  }
+
+  @BeforeClass
+  public void setUp()
+      throws Exception {
+    FileUtils.deleteQuietly(INDEX_DIR);
+
+    buildSegment(FIRST_SEGMENT_NAME);
+    buildSegment(SECOND_SEGMENT_NAME);
+    IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+
+    Set<String> invertedIndexCols = new HashSet<>();
+    invertedIndexCols.add(INT_COL_NAME);
+
+    indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
+    ImmutableSegment firstImmutableSegment =
+        ImmutableSegmentLoader.load(new File(INDEX_DIR, FIRST_SEGMENT_NAME), 
indexLoadingConfig);
+    ImmutableSegment secondImmutableSegment =
+        ImmutableSegmentLoader.load(new File(INDEX_DIR, SECOND_SEGMENT_NAME), 
indexLoadingConfig);
+    _indexSegment = firstImmutableSegment;
+    _indexSegments = Arrays.asList(firstImmutableSegment, 
secondImmutableSegment);
+  }
+
+  @AfterClass
+  public void tearDown() {
+    _indexSegment.destroy();
+    FileUtils.deleteQuietly(INDEX_DIR);
+  }
+
+  private List<GenericRow> createTestData(int numRows) {
+    List<GenericRow> rows = new ArrayList<>();
+
+    for (int i = 0; i < numRows; i++) {
+      GenericRow row = new GenericRow();
+      row.putField(INT_COL_NAME, INT_BASE_VALUE + i);
+      row.putField(NO_INDEX_INT_COL_NAME, i);
+      row.putField(STATIC_INT_COL_NAME, 10);
+
+      rows.add(row);
+    }
+    return rows;
+  }
+
+  private void buildSegment(String segmentName)
+      throws Exception {
+    List<GenericRow> rows = createTestData(NUM_ROWS);
+    List<FieldConfig> fieldConfigs = new ArrayList<>();
+
+    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+        
.setInvertedIndexColumns(Arrays.asList(INT_COL_NAME)).setFieldConfigList(fieldConfigs).build();
+    Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+        .addSingleValueDimension(NO_INDEX_INT_COL_NAME, FieldSpec.DataType.INT)
+        .addSingleValueDimension(STATIC_INT_COL_NAME, FieldSpec.DataType.INT)
+        .addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build();
+    SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, 
schema);
+    config.setOutDir(INDEX_DIR.getPath());
+    config.setTableName(TABLE_NAME);
+    config.setSegmentName(segmentName);
+
+    SegmentIndexCreationDriverImpl driver = new 
SegmentIndexCreationDriverImpl();
+    try (RecordReader recordReader = new GenericRowRecordReader(rows)) {
+      driver.init(config, recordReader);
+      driver.build();
+    }
+  }
+
+  private void testInterSegmentAggregationQueryHelper(String firstQuery, 
String secondQuery) {
+    // SQL
+    BrokerResponseNative firstBrokerResponseNative = 
getBrokerResponseForSqlQuery(firstQuery);
+    BrokerResponseNative secondBrokerResponseNative = 
getBrokerResponseForSqlQuery(secondQuery);
+    ResultTable firstResultTable = firstBrokerResponseNative.getResultTable();
+    ResultTable secondResultTable = 
secondBrokerResponseNative.getResultTable();
+    DataSchema firstDataSchema = firstResultTable.getDataSchema();
+    DataSchema secondDataSchema = secondResultTable.getDataSchema();
+
+    Assert.assertEquals(firstDataSchema.size(), secondDataSchema.size());
+
+    List<Object[]> firstSetOfRows = firstResultTable.getRows();
+    List<Object[]> secondSetOfRows = secondResultTable.getRows();
+
+    Assert.assertEquals(firstSetOfRows.size(), secondSetOfRows.size());
+
+    for (int i = 0; i < firstSetOfRows.size(); i++) {
+      Object[] firstSetRow = firstSetOfRows.get(i);
+      Object[] secondSetRow = secondSetOfRows.get(i);
+
+      Assert.assertEquals(firstSetRow.length, secondSetRow.length);
+
+      for (int j = 0; j < firstSetRow.length; j++) {
+        //System.out.println("FIRST " + firstSetRow[j] + " SECOND " + 
secondSetRow[j] + " j " + j);
+        Assert.assertEquals(firstSetRow[j], secondSetRow[j]);
+      }
+    }
+  }
+
+  @Test
+  public void testInterSegment() {
+
+  String query =
+        "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 9999)"
+            + "FROM MyTable WHERE INT_COL < 1000000";
+
+    String nonFilterQuery =
+        "SELECT SUM(INT_COL)"
+            + "FROM MyTable WHERE INT_COL > 9999 AND INT_COL < 1000000";
+
+    testInterSegmentAggregationQueryHelper(query, nonFilterQuery);
+
+    query = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 1234 AND INT_COL < 
22000)"
+        + "FROM MyTable";
+
+    nonFilterQuery = "SELECT SUM("

Review comment:
       (code style) Suggest reformatting the queries in this test to be more 
compact

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -62,57 +69,25 @@ public AggregationPlanNode(IndexSegment indexSegment, 
QueryContext queryContext)
   public Operator<IntermediateResultsBlock> run() {
     assert _queryContext.getAggregationFunctions() != null;
 
-    int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
-    AggregationFunction[] aggregationFunctions = 
_queryContext.getAggregationFunctions();
+    boolean hasFilteredPredicates = _queryContext.isHasFilteredAggregations();
 
-    FilterPlanNode filterPlanNode = new FilterPlanNode(_indexSegment, 
_queryContext);
-    BaseFilterOperator filterOperator = filterPlanNode.run();
+    Pair<FilterPlanNode, BaseFilterOperator> filterOperatorPair =

Review comment:
       What I meant is that we can branch these 2 cases earlier because the 
optimizations for regular aggregation don't apply to filtered aggregation (e.g. 
extra check on line 246, also star-tree should not be used for filtered 
aggregation which is not checked properly in the current code). It is more 
readable if we totally split these 2 cases:
   ```
       if (hasFilteredPredicates) {
         return buildOperatorForFilteredAggregations();
       } else {
         return buildOperatorForNonFilteredAggregations();
       }
   ```

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/TransformBlock.java
##########
@@ -43,6 +43,11 @@ public TransformBlock(ProjectionBlock projectionBlock,
     _transformFunctionMap = transformFunctionMap;
   }
 
+  protected TransformBlock(TransformBlock transformBlock) {

Review comment:
       Revert this file

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
##########
@@ -90,9 +92,11 @@
 
   // Pre-calculate the aggregation functions and columns for the query so that 
it can be shared across all the segments
   private AggregationFunction[] _aggregationFunctions;
-  private List<Pair<AggregationFunction, FilterContext>> 
_filteredAggregationFunctions;
+

Review comment:
       I see your point, but my concern is that aggregation function and filter 
are logically two independent concept, and embedding filter into an aggregation 
function could cause confusion. If we need to associate some extra attributes 
to an aggregation function, I'd suggest adding a wrapper class instead of 
implementing a special `AggregationFunction`.
   Based on the current implementation, I feel `Pair` itself should be enough 
(only need to associate the `FilterContext` with the `AggregationFunction`).  
To maintain the order of the aggregations, we may add pairs with `null` 
`FilterContext`

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
##########
@@ -441,34 +471,54 @@ public QueryContext build() {
      */
     private void generateAggregationFunctions(QueryContext queryContext) {
       List<AggregationFunction> aggregationFunctions = new ArrayList<>();
-      List<Pair<AggregationFunction, FilterContext>> 
filteredAggregationFunctions = new ArrayList<>();
       Map<FunctionContext, Integer> aggregationFunctionIndexMap = new 
HashMap<>();
+      Map<Pair<FunctionContext, FilterContext>, Integer> 
filterExpressionIndexMap = new HashMap<>();
 
       // Add aggregation functions in the SELECT clause
       // NOTE: DO NOT deduplicate the aggregation functions in the SELECT 
clause because that involves protocol change.
-      List<FunctionContext> aggregationsInSelect = new ArrayList<>();
-      List<Pair<FunctionContext, FilterContext>> filteredAggregations = new 
ArrayList<>();
+      List<Pair<Pair<FilterContext, ExpressionContext>, FunctionContext>> 
aggregationsInSelect = new ArrayList<>();
       for (ExpressionContext selectExpression : 
queryContext._selectExpressions) {
-        getAggregations(selectExpression, aggregationsInSelect, 
filteredAggregations);
+        getAggregations(selectExpression, aggregationsInSelect);
       }
-      for (FunctionContext function : aggregationsInSelect) {
+      for (Pair<Pair<FilterContext, ExpressionContext>, FunctionContext> pair 
: aggregationsInSelect) {
+        FunctionContext function = pair.getRight();
         int functionIndex = aggregationFunctions.size();
         AggregationFunction aggregationFunction =
             AggregationFunctionFactory.getAggregationFunction(function, 
queryContext);
+
+        // Hack: If the left pair is not null, implies a filtered aggregation

Review comment:
       Revise this comment? We should not have hack in production code

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean 
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
     }
     return true;
   }
+
+  /**
+   * Build a FilteredAggregationOperator given the parameters.
+   * @param mainPredicateFilterOperator Filter operator corresponding to the 
main predicate
+   * @param mainTransformOperator Transform operator corresponding to the main 
predicate
+   * @param aggregationFunctions Aggregation functions in the query
+   * @param numTotalDocs Number of total docs
+   */
+  private BaseOperator<IntermediateResultsBlock> 
buildOperatorForFilteredAggregations(
+      BaseFilterOperator mainPredicateFilterOperator,
+      TransformOperator mainTransformOperator,
+      AggregationFunction[] aggregationFunctions, int numTotalDocs) {
+    Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>> 
expressionContextToAggFuncsMap =
+        new HashMap<>();
+    List<AggregationFunction> nonFilteredAggregationFunctions = new 
ArrayList<>();
+
+    // For each aggregation function, check if the aggregation function is a 
filtered agg.
+    // If it is, populate the corresponding filter operator and corresponding 
transform operator
+    for (AggregationFunction aggregationFunction : aggregationFunctions) {
+      if (aggregationFunction instanceof FilterableAggregationFunction) {
+        FilterableAggregationFunction filterableAggregationFunction =
+            (FilterableAggregationFunction) aggregationFunction;
+
+        ExpressionContext currentFilterExpression = 
filterableAggregationFunction

Review comment:
       The `currentFilterExpression` seems redundant. You may directly use 
`filterContext` as the key

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/FilterableAggregationFunction.java
##########
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.FilterContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+
+/**
+ * Represents a filtered aggregation
+ */
+public class FilterableAggregationFunction implements
+                                           AggregationFunction<Object, 
Comparable> {
+  private AggregationFunction<Object, Comparable> _innerAggregationFunction;
+  private ExpressionContext _associatedExpressionContext;
+  private FilterContext _filterContext;
+
+  public FilterableAggregationFunction(AggregationFunction aggregationFunction,
+      ExpressionContext associatedExpressionContext, FilterContext 
filterContext) {
+    _innerAggregationFunction = aggregationFunction;
+    _associatedExpressionContext = associatedExpressionContext;
+    _filterContext = filterContext;
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return _innerAggregationFunction.getType();
+  }
+
+  @Override
+  public String getColumnName() {
+    return _innerAggregationFunction.getColumnName();
+  }
+
+  @Override
+  public String getResultColumnName() {
+    return _innerAggregationFunction.getResultColumnName();
+  }
+
+  @Override
+  public List<ExpressionContext> getInputExpressions() {
+    return _innerAggregationFunction.getInputExpressions();
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return _innerAggregationFunction.createAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return 
_innerAggregationFunction.createGroupByResultHolder(initialCapacity, 
maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    _innerAggregationFunction.aggregate(length, aggregationResultHolder, 
blockValSetMap);
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    _innerAggregationFunction.aggregateGroupBySV(length, groupKeyArray, 
groupByResultHolder,
+        blockValSetMap);
+  }
+
+  @Override
+  public void aggregateGroupByMV(int length, int[][] groupKeysArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    _innerAggregationFunction.aggregateGroupByMV(length, groupKeysArray, 
groupByResultHolder,
+        blockValSetMap);
+  }
+
+  @Override
+  public Object extractAggregationResult(AggregationResultHolder 
aggregationResultHolder) {
+    return 
_innerAggregationFunction.extractAggregationResult(aggregationResultHolder);
+  }
+
+  @Override
+  public Object extractGroupByResult(GroupByResultHolder groupByResultHolder, 
int groupKey) {
+    return _innerAggregationFunction.extractGroupByResult(groupByResultHolder, 
groupKey);
+  }
+
+  @Override
+  public Object merge(Object intermediateResult1, Object intermediateResult2) {
+    return _innerAggregationFunction.merge(intermediateResult1, 
intermediateResult2);
+  }
+
+  @Override
+  public DataSchema.ColumnDataType getIntermediateResultColumnType() {
+    return _innerAggregationFunction.getIntermediateResultColumnType();
+  }
+
+  @Override
+  public DataSchema.ColumnDataType getFinalResultColumnType() {
+    return _innerAggregationFunction.getFinalResultColumnType();
+  }
+
+  @Override
+  public Comparable extractFinalResult(Object o) {
+    return _innerAggregationFunction.extractFinalResult(o);
+  }
+
+  @Override
+  public String toExplainString() {
+    return null;

Review comment:
       ^^

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean 
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
     }
     return true;
   }
+
+  /**
+   * Build a FilteredAggregationOperator given the parameters.
+   * @param mainPredicateFilterOperator Filter operator corresponding to the 
main predicate
+   * @param mainTransformOperator Transform operator corresponding to the main 
predicate
+   * @param aggregationFunctions Aggregation functions in the query
+   * @param numTotalDocs Number of total docs
+   */
+  private BaseOperator<IntermediateResultsBlock> 
buildOperatorForFilteredAggregations(
+      BaseFilterOperator mainPredicateFilterOperator,
+      TransformOperator mainTransformOperator,
+      AggregationFunction[] aggregationFunctions, int numTotalDocs) {
+    Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>> 
expressionContextToAggFuncsMap =
+        new HashMap<>();
+    List<AggregationFunction> nonFilteredAggregationFunctions = new 
ArrayList<>();
+
+    // For each aggregation function, check if the aggregation function is a 
filtered agg.
+    // If it is, populate the corresponding filter operator and corresponding 
transform operator
+    for (AggregationFunction aggregationFunction : aggregationFunctions) {
+      if (aggregationFunction instanceof FilterableAggregationFunction) {
+        FilterableAggregationFunction filterableAggregationFunction =
+            (FilterableAggregationFunction) aggregationFunction;
+
+        ExpressionContext currentFilterExpression = 
filterableAggregationFunction
+            .getAssociatedExpressionContext();
+
+        if (expressionContextToAggFuncsMap.get(currentFilterExpression) != 
null) {
+          
expressionContextToAggFuncsMap.get(currentFilterExpression).getLeft().add(aggregationFunction);

Review comment:
       (Major) I think the `TransformOperator` cannot be shared among multiple 
aggregations. Once it iterates over a block, it won't process the same block 
again for the next aggregation. Let's add some test queries with multiple 
aggregations on the same filter.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to