Jackie-Jiang commented on a change in pull request #7916: URL: https://github.com/apache/pinot/pull/7916#discussion_r787207706
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/ProjectionBlock.java ########## @@ -52,7 +52,7 @@ public BlockValSet getBlockValueSet(String column) { @Override public BlockDocIdSet getBlockDocIdSet() { - throw new UnsupportedOperationException(); + return null; Review comment: Revert this since it is no longer relevant ########## File path: pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java ########## @@ -154,4 +129,168 @@ private static boolean isFitForDictionaryBasedPlan(AggregationFunction[] aggrega } return true; } + + /** + * Build a FilteredAggregationOperator given the parameters. + * @param mainPredicateFilterOperator Filter operator corresponding to the main predicate + * @param mainTransformOperator Transform operator corresponding to the main predicate + * @param aggregationFunctions Aggregation functions in the query + * @param numTotalDocs Number of total docs + */ + private BaseOperator<IntermediateResultsBlock> buildOperatorForFilteredAggregations( + BaseFilterOperator mainPredicateFilterOperator, Review comment: (code format) Can you apply the latest [code format](https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#intellij) and reformat this file? Several places does not follow the code format. Also, can we reduce some empty lines in this method? ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java ########## @@ -0,0 +1,512 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.data.readers.RecordReader; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class FilteredAggregationsTest extends BaseQueriesTest { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "FilteredAggregationsTest"); + private static final String TABLE_NAME = "MyTable"; + private static final String FIRST_SEGMENT_NAME = "firstTestSegment"; + private static final String SECOND_SEGMENT_NAME = "secondTestSegment"; + private static final String INT_COL_NAME = "INT_COL"; + private static final String NO_INDEX_INT_COL_NAME = "NO_INDEX_COL"; + private static final String STATIC_INT_COL_NAME = "STATIC_INT_COL"; + private static final Integer INT_BASE_VALUE = 0; + private static final Integer NUM_ROWS = 30000; + + + private IndexSegment _indexSegment; + private List<IndexSegment> _indexSegments; + + @Override + protected String getFilter() { + return ""; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List<IndexSegment> getIndexSegments() { + return _indexSegments; + } + + @BeforeClass + public void setUp() + throws Exception { + FileUtils.deleteQuietly(INDEX_DIR); + + buildSegment(FIRST_SEGMENT_NAME); + buildSegment(SECOND_SEGMENT_NAME); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + + Set<String> invertedIndexCols = new HashSet<>(); + invertedIndexCols.add(INT_COL_NAME); + + indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols); + ImmutableSegment firstImmutableSegment = + ImmutableSegmentLoader.load(new File(INDEX_DIR, FIRST_SEGMENT_NAME), indexLoadingConfig); + ImmutableSegment secondImmutableSegment = + ImmutableSegmentLoader.load(new File(INDEX_DIR, SECOND_SEGMENT_NAME), indexLoadingConfig); + _indexSegment = firstImmutableSegment; + _indexSegments = Arrays.asList(firstImmutableSegment, secondImmutableSegment); + } + + @AfterClass + public void tearDown() { + _indexSegment.destroy(); + FileUtils.deleteQuietly(INDEX_DIR); + } + + private List<GenericRow> createTestData(int numRows) { + List<GenericRow> rows = new ArrayList<>(); + + for (int i = 0; i < numRows; i++) { + GenericRow row = new GenericRow(); + row.putField(INT_COL_NAME, INT_BASE_VALUE + i); + row.putField(NO_INDEX_INT_COL_NAME, i); + row.putField(STATIC_INT_COL_NAME, 10); + + rows.add(row); + } + return rows; + } + + private void buildSegment(String segmentName) + throws Exception { + List<GenericRow> rows = createTestData(NUM_ROWS); + List<FieldConfig> fieldConfigs = new ArrayList<>(); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setInvertedIndexColumns(Arrays.asList(INT_COL_NAME)).setFieldConfigList(fieldConfigs).build(); + Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addSingleValueDimension(NO_INDEX_INT_COL_NAME, FieldSpec.DataType.INT) + .addSingleValueDimension(STATIC_INT_COL_NAME, FieldSpec.DataType.INT) + .addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build(); + SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema); + config.setOutDir(INDEX_DIR.getPath()); + config.setTableName(TABLE_NAME); + config.setSegmentName(segmentName); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + try (RecordReader recordReader = new GenericRowRecordReader(rows)) { + driver.init(config, recordReader); + driver.build(); + } + } + + private void testInterSegmentAggregationQueryHelper(String firstQuery, String secondQuery) { + // SQL + BrokerResponseNative firstBrokerResponseNative = getBrokerResponseForSqlQuery(firstQuery); + BrokerResponseNative secondBrokerResponseNative = getBrokerResponseForSqlQuery(secondQuery); + ResultTable firstResultTable = firstBrokerResponseNative.getResultTable(); + ResultTable secondResultTable = secondBrokerResponseNative.getResultTable(); + DataSchema firstDataSchema = firstResultTable.getDataSchema(); + DataSchema secondDataSchema = secondResultTable.getDataSchema(); + + Assert.assertEquals(firstDataSchema.size(), secondDataSchema.size()); + + List<Object[]> firstSetOfRows = firstResultTable.getRows(); + List<Object[]> secondSetOfRows = secondResultTable.getRows(); + + Assert.assertEquals(firstSetOfRows.size(), secondSetOfRows.size()); + + for (int i = 0; i < firstSetOfRows.size(); i++) { + Object[] firstSetRow = firstSetOfRows.get(i); + Object[] secondSetRow = secondSetOfRows.get(i); + + Assert.assertEquals(firstSetRow.length, secondSetRow.length); + + for (int j = 0; j < firstSetRow.length; j++) { + //System.out.println("FIRST " + firstSetRow[j] + " SECOND " + secondSetRow[j] + " j " + j); + Assert.assertEquals(firstSetRow[j], secondSetRow[j]); + } + } + } + + @Test + public void testInterSegment() { + + String query = + "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 9999)" + + "FROM MyTable WHERE INT_COL < 1000000"; + + String nonFilterQuery = + "SELECT SUM(INT_COL)" + + "FROM MyTable WHERE INT_COL > 9999 AND INT_COL < 1000000"; + + testInterSegmentAggregationQueryHelper(query, nonFilterQuery); + + query = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 1234 AND INT_COL < 22000)" + + "FROM MyTable"; + + nonFilterQuery = "SELECT SUM(" Review comment: (code style) Suggest reformatting the queries in this test to be more compact ########## File path: pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java ########## @@ -62,57 +69,25 @@ public AggregationPlanNode(IndexSegment indexSegment, QueryContext queryContext) public Operator<IntermediateResultsBlock> run() { assert _queryContext.getAggregationFunctions() != null; - int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs(); - AggregationFunction[] aggregationFunctions = _queryContext.getAggregationFunctions(); + boolean hasFilteredPredicates = _queryContext.isHasFilteredAggregations(); - FilterPlanNode filterPlanNode = new FilterPlanNode(_indexSegment, _queryContext); - BaseFilterOperator filterOperator = filterPlanNode.run(); + Pair<FilterPlanNode, BaseFilterOperator> filterOperatorPair = Review comment: What I meant is that we can branch these 2 cases earlier because the optimizations for regular aggregation don't apply to filtered aggregation (e.g. extra check on line 246, also star-tree should not be used for filtered aggregation which is not checked properly in the current code). It is more readable if we totally split these 2 cases: ``` if (hasFilteredPredicates) { return buildOperatorForFilteredAggregations(); } else { return buildOperatorForNonFilteredAggregations(); } ``` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/TransformBlock.java ########## @@ -43,6 +43,11 @@ public TransformBlock(ProjectionBlock projectionBlock, _transformFunctionMap = transformFunctionMap; } + protected TransformBlock(TransformBlock transformBlock) { Review comment: Revert this file ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java ########## @@ -90,9 +92,11 @@ // Pre-calculate the aggregation functions and columns for the query so that it can be shared across all the segments private AggregationFunction[] _aggregationFunctions; - private List<Pair<AggregationFunction, FilterContext>> _filteredAggregationFunctions; + Review comment: I see your point, but my concern is that aggregation function and filter are logically two independent concept, and embedding filter into an aggregation function could cause confusion. If we need to associate some extra attributes to an aggregation function, I'd suggest adding a wrapper class instead of implementing a special `AggregationFunction`. Based on the current implementation, I feel `Pair` itself should be enough (only need to associate the `FilterContext` with the `AggregationFunction`). To maintain the order of the aggregations, we may add pairs with `null` `FilterContext` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java ########## @@ -441,34 +471,54 @@ public QueryContext build() { */ private void generateAggregationFunctions(QueryContext queryContext) { List<AggregationFunction> aggregationFunctions = new ArrayList<>(); - List<Pair<AggregationFunction, FilterContext>> filteredAggregationFunctions = new ArrayList<>(); Map<FunctionContext, Integer> aggregationFunctionIndexMap = new HashMap<>(); + Map<Pair<FunctionContext, FilterContext>, Integer> filterExpressionIndexMap = new HashMap<>(); // Add aggregation functions in the SELECT clause // NOTE: DO NOT deduplicate the aggregation functions in the SELECT clause because that involves protocol change. - List<FunctionContext> aggregationsInSelect = new ArrayList<>(); - List<Pair<FunctionContext, FilterContext>> filteredAggregations = new ArrayList<>(); + List<Pair<Pair<FilterContext, ExpressionContext>, FunctionContext>> aggregationsInSelect = new ArrayList<>(); for (ExpressionContext selectExpression : queryContext._selectExpressions) { - getAggregations(selectExpression, aggregationsInSelect, filteredAggregations); + getAggregations(selectExpression, aggregationsInSelect); } - for (FunctionContext function : aggregationsInSelect) { + for (Pair<Pair<FilterContext, ExpressionContext>, FunctionContext> pair : aggregationsInSelect) { + FunctionContext function = pair.getRight(); int functionIndex = aggregationFunctions.size(); AggregationFunction aggregationFunction = AggregationFunctionFactory.getAggregationFunction(function, queryContext); + + // Hack: If the left pair is not null, implies a filtered aggregation Review comment: Revise this comment? We should not have hack in production code ########## File path: pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java ########## @@ -154,4 +129,168 @@ private static boolean isFitForDictionaryBasedPlan(AggregationFunction[] aggrega } return true; } + + /** + * Build a FilteredAggregationOperator given the parameters. + * @param mainPredicateFilterOperator Filter operator corresponding to the main predicate + * @param mainTransformOperator Transform operator corresponding to the main predicate + * @param aggregationFunctions Aggregation functions in the query + * @param numTotalDocs Number of total docs + */ + private BaseOperator<IntermediateResultsBlock> buildOperatorForFilteredAggregations( + BaseFilterOperator mainPredicateFilterOperator, + TransformOperator mainTransformOperator, + AggregationFunction[] aggregationFunctions, int numTotalDocs) { + Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>> expressionContextToAggFuncsMap = + new HashMap<>(); + List<AggregationFunction> nonFilteredAggregationFunctions = new ArrayList<>(); + + // For each aggregation function, check if the aggregation function is a filtered agg. + // If it is, populate the corresponding filter operator and corresponding transform operator + for (AggregationFunction aggregationFunction : aggregationFunctions) { + if (aggregationFunction instanceof FilterableAggregationFunction) { + FilterableAggregationFunction filterableAggregationFunction = + (FilterableAggregationFunction) aggregationFunction; + + ExpressionContext currentFilterExpression = filterableAggregationFunction Review comment: The `currentFilterExpression` seems redundant. You may directly use `filterContext` as the key ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/FilterableAggregationFunction.java ########## @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.List; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FilterContext; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; + +/** + * Represents a filtered aggregation + */ +public class FilterableAggregationFunction implements + AggregationFunction<Object, Comparable> { + private AggregationFunction<Object, Comparable> _innerAggregationFunction; + private ExpressionContext _associatedExpressionContext; + private FilterContext _filterContext; + + public FilterableAggregationFunction(AggregationFunction aggregationFunction, + ExpressionContext associatedExpressionContext, FilterContext filterContext) { + _innerAggregationFunction = aggregationFunction; + _associatedExpressionContext = associatedExpressionContext; + _filterContext = filterContext; + } + + @Override + public AggregationFunctionType getType() { + return _innerAggregationFunction.getType(); + } + + @Override + public String getColumnName() { + return _innerAggregationFunction.getColumnName(); + } + + @Override + public String getResultColumnName() { + return _innerAggregationFunction.getResultColumnName(); + } + + @Override + public List<ExpressionContext> getInputExpressions() { + return _innerAggregationFunction.getInputExpressions(); + } + + @Override + public AggregationResultHolder createAggregationResultHolder() { + return _innerAggregationFunction.createAggregationResultHolder(); + } + + @Override + public GroupByResultHolder createGroupByResultHolder(int initialCapacity, int maxCapacity) { + return _innerAggregationFunction.createGroupByResultHolder(initialCapacity, maxCapacity); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + _innerAggregationFunction.aggregate(length, aggregationResultHolder, blockValSetMap); + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + _innerAggregationFunction.aggregateGroupBySV(length, groupKeyArray, groupByResultHolder, + blockValSetMap); + } + + @Override + public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + _innerAggregationFunction.aggregateGroupByMV(length, groupKeysArray, groupByResultHolder, + blockValSetMap); + } + + @Override + public Object extractAggregationResult(AggregationResultHolder aggregationResultHolder) { + return _innerAggregationFunction.extractAggregationResult(aggregationResultHolder); + } + + @Override + public Object extractGroupByResult(GroupByResultHolder groupByResultHolder, int groupKey) { + return _innerAggregationFunction.extractGroupByResult(groupByResultHolder, groupKey); + } + + @Override + public Object merge(Object intermediateResult1, Object intermediateResult2) { + return _innerAggregationFunction.merge(intermediateResult1, intermediateResult2); + } + + @Override + public DataSchema.ColumnDataType getIntermediateResultColumnType() { + return _innerAggregationFunction.getIntermediateResultColumnType(); + } + + @Override + public DataSchema.ColumnDataType getFinalResultColumnType() { + return _innerAggregationFunction.getFinalResultColumnType(); + } + + @Override + public Comparable extractFinalResult(Object o) { + return _innerAggregationFunction.extractFinalResult(o); + } + + @Override + public String toExplainString() { + return null; Review comment: ^^ ########## File path: pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java ########## @@ -154,4 +129,168 @@ private static boolean isFitForDictionaryBasedPlan(AggregationFunction[] aggrega } return true; } + + /** + * Build a FilteredAggregationOperator given the parameters. + * @param mainPredicateFilterOperator Filter operator corresponding to the main predicate + * @param mainTransformOperator Transform operator corresponding to the main predicate + * @param aggregationFunctions Aggregation functions in the query + * @param numTotalDocs Number of total docs + */ + private BaseOperator<IntermediateResultsBlock> buildOperatorForFilteredAggregations( + BaseFilterOperator mainPredicateFilterOperator, + TransformOperator mainTransformOperator, + AggregationFunction[] aggregationFunctions, int numTotalDocs) { + Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>> expressionContextToAggFuncsMap = + new HashMap<>(); + List<AggregationFunction> nonFilteredAggregationFunctions = new ArrayList<>(); + + // For each aggregation function, check if the aggregation function is a filtered agg. + // If it is, populate the corresponding filter operator and corresponding transform operator + for (AggregationFunction aggregationFunction : aggregationFunctions) { + if (aggregationFunction instanceof FilterableAggregationFunction) { + FilterableAggregationFunction filterableAggregationFunction = + (FilterableAggregationFunction) aggregationFunction; + + ExpressionContext currentFilterExpression = filterableAggregationFunction + .getAssociatedExpressionContext(); + + if (expressionContextToAggFuncsMap.get(currentFilterExpression) != null) { + expressionContextToAggFuncsMap.get(currentFilterExpression).getLeft().add(aggregationFunction); Review comment: (Major) I think the `TransformOperator` cannot be shared among multiple aggregations. Once it iterates over a block, it won't process the same block again for the next aggregation. Let's add some test queries with multiple aggregations on the same filter. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org