Jackie-Jiang commented on a change in pull request #7781: URL: https://github.com/apache/pinot/pull/7781#discussion_r759714618
########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java ########## @@ -201,6 +202,14 @@ public int getOffset() { return _offset; } + public boolean isAggregateGapfill() { Review comment: Let's move this method to the `QueryContextUtils` as a util method ########## File path: pinot-core/src/main/java/org/apache/pinot/core/util/GapfillUtils.java ########## @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.util; + +import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FunctionContext; +import org.apache.pinot.common.utils.DataSchema; + +/** + * Util class to encapsulate all utilites required for gapfill. + */ +public class GapfillUtils { + private static final String POST_AGGREGATE_GAP_FILL = "postaggregategapfill"; + private static final String FILL = "fill"; + + private GapfillUtils() { + } + + public static ExpressionContext stripGapfill(ExpressionContext expression) { + if (expression.getType() != ExpressionContext.Type.FUNCTION) { + return expression; + } + + FunctionContext function = expression.getFunction(); + String functionName = canonicalizeFunctionName(function.getFunctionName()); + if (functionName.equals(POST_AGGREGATE_GAP_FILL) || functionName.equals(FILL)) { + return function.getArguments().get(0); + } + return expression; + } + + public static boolean isPostAggregateGapfill(ExpressionContext expressionContext) { + if (expressionContext.getType() != ExpressionContext.Type.FUNCTION) { + return false; + } + + return POST_AGGREGATE_GAP_FILL.equals(canonicalizeFunctionName(expressionContext.getFunction().getFunctionName())); + } + + public static boolean isFill(ExpressionContext expressionContext) { + if (expressionContext.getType() != ExpressionContext.Type.FUNCTION) { + return false; + } + + return FILL.equals(canonicalizeFunctionName(expressionContext.getFunction().getFunctionName())); + } + + static public enum FillType { + FILL_DEFAULT_VALUE, + FILL_PREVIOUS_VALUE, + } + + /** + * The default gapfill value for each column type. + */ + static public Serializable getDefaultValue(DataSchema.ColumnDataType dataType) { + switch (dataType) { + // Single-value column + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case BOOLEAN: + case TIMESTAMP: + return dataType.convertAndFormat(0); + case STRING: + case JSON: + case BYTES: + return ""; + case INT_ARRAY: + return new int[0]; + case LONG_ARRAY: + return new long[0]; + case FLOAT_ARRAY: + return new float[0]; + case DOUBLE_ARRAY: + return new double[0]; + case STRING_ARRAY: + case TIMESTAMP_ARRAY: + return new String[0]; + case BOOLEAN_ARRAY: + return new boolean[0]; + case BYTES_ARRAY: + return new byte[0][0]; + default: + throw new IllegalStateException(String.format("Cannot provide the default value for the type: %s", dataType)); + } + } + + public static boolean isTimeBucketTimeFunction(ExpressionContext expressionContext) { Review comment: I feel this validation is not required and could reject some valid queries. The first argument doesn't have to be a time bucket function. It can be a time column (e.g. `hoursSinceEpoch`) or some other udf (e.g. `round`) ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/PostAggregationHandler.java ########## @@ -67,7 +68,8 @@ public PostAggregationHandler(QueryContext queryContext, DataSchema dataSchema) String[] columnNames = new String[numSelectExpressions]; ColumnDataType[] columnDataTypes = new ColumnDataType[numSelectExpressions]; for (int i = 0; i < numSelectExpressions; i++) { - ValueExtractor valueExtractor = getValueExtractor(selectExpressions.get(i)); + ValueExtractor valueExtractor + = getValueExtractor(selectExpressions.get(i)); Review comment: (minor) revert ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GapFillGroupByDataTableReducer.java ########## @@ -0,0 +1,494 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import org.apache.pinot.common.exception.QueryException; +import org.apache.pinot.common.metrics.BrokerGauge; +import org.apache.pinot.common.metrics.BrokerMeter; +import org.apache.pinot.common.metrics.BrokerMetrics; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FilterContext; +import org.apache.pinot.common.request.context.OrderByExpressionContext; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.QueryProcessingException; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.common.utils.DataTable; +import org.apache.pinot.core.data.table.ConcurrentIndexedTable; +import org.apache.pinot.core.data.table.IndexedTable; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.data.table.Record; +import org.apache.pinot.core.data.table.SimpleIndexedTable; +import org.apache.pinot.core.data.table.UnboundedConcurrentIndexedTable; +import org.apache.pinot.core.operator.combine.GroupByOrderByCombineOperator; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.transport.ServerRoutingInstance; +import org.apache.pinot.core.util.GapfillUtils; +import org.apache.pinot.core.util.GroupByUtils; +import org.apache.pinot.core.util.trace.TraceCallable; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +/** + * Helper class to reduce data tables and set group by results into the BrokerResponseNative + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class GapFillGroupByDataTableReducer implements DataTableReducer { + private static final int MIN_DATA_TABLES_FOR_CONCURRENT_REDUCE = 2; // TBD, find a better value. + + private final QueryContext _queryContext; + private final AggregationFunction[] _aggregationFunctions; + private final int _numAggregationFunctions; + private final List<ExpressionContext> _groupByExpressions; + private final int _numGroupByExpressions; + private final int _numColumns; + private final DateTimeGranularitySpec _dateTimeGranularity; + private final DateTimeFormatSpec _dateTimeFormatter; + private final long _startMs; + private final long _endMs; + private final Set<Key> _groupByKeys; + private final Map<Key, Object[]> _previousByGroupKey; + private final int _numOfGroupByKeys; + private final List<Integer> _groupByKeyIndexes; + private final boolean [] _isGroupBySelections; + private int _timeBucketIndex = -1; + + GapFillGroupByDataTableReducer(QueryContext queryContext) { + Preconditions.checkArgument( + queryContext.getBrokerRequest().getPinotQuery() != null, "GapFill can only be applied to sql query"); + _queryContext = queryContext; + _aggregationFunctions = queryContext.getAggregationFunctions(); + assert _aggregationFunctions != null; + _numAggregationFunctions = _aggregationFunctions.length; + _groupByExpressions = queryContext.getGroupByExpressions(); + assert _groupByExpressions != null; + _numGroupByExpressions = _groupByExpressions.size(); + _numColumns = _numAggregationFunctions + _numGroupByExpressions; + + ExpressionContext gapFillSelection = null; + for (ExpressionContext expressionContext : _queryContext.getSelectExpressions()) { + if (GapfillUtils.isPostAggregateGapfill(expressionContext)) { + gapFillSelection = expressionContext; + break; + } + } + + List<ExpressionContext> args = gapFillSelection.getFunction().getArguments(); + Preconditions.checkArgument( + args.size() == 5, "PostAggregateGapFill does not have correct number of arguments."); + Preconditions.checkArgument( + args.get(1).getLiteral() != null, "The second argument of PostAggregateGapFill should be TimeFormatter."); + Preconditions.checkArgument( + args.get(2).getLiteral() != null, "The third argument of PostAggregateGapFill should be start time."); + Preconditions.checkArgument( + args.get(3).getLiteral() != null, "The fourth argument of PostAggregateGapFill should be end time."); + Preconditions.checkArgument( + args.get(4).getLiteral() != null, "The fifth argument of PostAggregateGapFill should be time bucket size."); + + Preconditions.checkArgument( + GapfillUtils.isTimeBucketTimeFunction(args.get(0)), + "The first argument of PostAggregateGapFill should be timeBucket Function."); + + boolean orderByTimeBucket = false; + if (_queryContext.getOrderByExpressions() != null) { + for (OrderByExpressionContext expressionContext : _queryContext.getOrderByExpressions()) { + if (expressionContext.getExpression().equals(gapFillSelection)) { Review comment: Should we allow ordering on the first argument as well? E.g. `select postaggregategapfill(timeCol, ...) ... order by timeCol ...` We should also check if it is the first order by expression, and in ascending order ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/PostAggregationGapfillQueriesTest.java ########## @@ -0,0 +1,592 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.utils.ReadMode; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +/** + * Queries test for PostAggregationGapfill queries. + */ +@SuppressWarnings("rawtypes") +public class PostAggregationGapfillQueriesTest extends BaseQueriesTest { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "PostAggregationGapfillQueriesTest"); + private static final String RAW_TABLE_NAME = "parkingData"; + private static final String SEGMENT_NAME = "testSegment"; + private static final Random RANDOM = new Random(); + + private static final int NUM_LOTS = 4; + + private static final String IS_OCCUPIED_COLUMN = "isOccupied"; + private static final String LOT_ID_COLUMN = "lotId"; + private static final String EVENT_TIME_COLUMN = "eventTime"; + private static final Schema SCHEMA = new Schema.SchemaBuilder() + .addSingleValueDimension(IS_OCCUPIED_COLUMN, DataType.BOOLEAN) + .addSingleValueDimension(LOT_ID_COLUMN, DataType.STRING) + .addSingleValueDimension(EVENT_TIME_COLUMN, DataType.LONG) + .setPrimaryKeyColumns(Arrays.asList(LOT_ID_COLUMN, EVENT_TIME_COLUMN)) + .build(); + private static final TableConfig TABLE_CONFIG = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME) + .build(); + + private IndexSegment _indexSegment; + private List<IndexSegment> _indexSegments; + + @Override + protected String getFilter() { + // NOTE: Use a match all filter to switch between DictionaryBasedAggregationOperator and AggregationOperator + return " WHERE eventTime >= 0"; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List<IndexSegment> getIndexSegments() { + return _indexSegments; + } + + @BeforeClass + public void setUp() + throws Exception { + FileUtils.deleteDirectory(INDEX_DIR); + + long current = 1636286400000L; //November 7, 2021 12:00:00 PM + int duplicates = 16; + int interval = 1000 * 900; // 15 minutes + long start = current - duplicates * 2 * interval; //November 7, 2021 4:00:00 AM + + List<GenericRow> records = new ArrayList<>(NUM_LOTS * 2); + for (int i = 0; i < NUM_LOTS; i++) { + for (int j = 0; j < duplicates; j++) { + if (j == 4 || j == 5 || j == 6 || j == 7 || j == 10 || j == 11) { + continue; + } + long parkingTime = start + interval * 2 * j + RANDOM.nextInt(interval); + long departingTime = j == 3 ? start + interval * (2 * j + 6) + RANDOM.nextInt(interval) : start + + interval * (2 * j + 1) + RANDOM.nextInt(interval); + + GenericRow parkingRow = new GenericRow(); + parkingRow.putValue(EVENT_TIME_COLUMN, parkingTime); + parkingRow.putValue(LOT_ID_COLUMN, "LotId_" + String.valueOf(i)); + parkingRow.putValue(IS_OCCUPIED_COLUMN, true); + records.add(parkingRow); + + GenericRow departingRow = new GenericRow(); + departingRow.putValue(EVENT_TIME_COLUMN, departingTime); + departingRow.putValue(LOT_ID_COLUMN, "LotId_" + String.valueOf(i)); + departingRow.putValue(IS_OCCUPIED_COLUMN, false); + records.add(departingRow); + } + } + + SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); + segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); + segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); + segmentGeneratorConfig.setOutDir(INDEX_DIR.getPath()); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + driver.init(segmentGeneratorConfig, new GenericRowRecordReader(records)); + driver.build(); + + ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), ReadMode.mmap); + _indexSegment = immutableSegment; + _indexSegments = Arrays.asList(immutableSegment); + } + + @Test + public void datetimeconvertGapfillTest() { + String dataTimeConvertQuery = "SELECT " + + "DATETIMECONVERT(eventTime, '1:MILLISECONDS:EPOCH', " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:HOURS') AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 24); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(DATETIMECONVERT(eventTime, '1:MILLISECONDS:EPOCH', " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:HOURS'), " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'2021-11-07 3:00:00.000', '2021-11-07 12:00:00.000', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + DateTimeFormatSpec dateTimeFormatter + = new DateTimeFormatSpec("1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS"); + DateTimeGranularitySpec dateTimeGranularity = new DateTimeGranularitySpec("1:HOURS"); + + BrokerResponseNative gapfillBrokerResponse = getBrokerResponseForSqlQuery(gapfillQuery); + + ResultTable gapFillResultTable = gapfillBrokerResponse.getResultTable(); + Assert.assertEquals(gapFillResultTable.getRows().size(), 32); + List<Object[]> gapFillRows = gapFillResultTable.getRows(); + long start = dateTimeFormatter.fromFormatToMillis("2021-11-07 03:00:00.000"); + for (int i = 0; i < 32; i += 4) { + String firstTimeCol = (String) gapFillRows.get(i)[0]; + long timeStamp = dateTimeFormatter.fromFormatToMillis(firstTimeCol); + Assert.assertEquals(timeStamp, start); + Set<String> lots = new HashSet<>(); + lots.add((String) gapFillRows.get(i)[1]); + for (int j = 1; j < 4; j++) { + Assert.assertEquals(gapFillRows.get(i)[0], gapFillRows.get(i + j)[0]); + Assert.assertFalse(lots.contains(gapFillRows.get(i + j)[1])); + lots.add((String) gapFillRows.get(i + j)[1]); + } + start += dateTimeGranularity.granularityToMillis(); + } + } + + @Test + public void toEpochHoursGapfillTest() { + String dataTimeConvertQuery = "SELECT " + + "ToEpochHours(eventTime) AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 24); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(ToEpochHours(eventTime), '1:HOURS:EPOCH', " + + "'454515', '454524', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + DateTimeFormatSpec dateTimeFormatter = new DateTimeFormatSpec("1:HOURS:EPOCH"); + DateTimeGranularitySpec dateTimeGranularity = new DateTimeGranularitySpec("1:HOURS"); + + BrokerResponseNative gapfillBrokerResponse = getBrokerResponseForSqlQuery(gapfillQuery); + + ResultTable gapFillResultTable = gapfillBrokerResponse.getResultTable(); + Assert.assertEquals(gapFillResultTable.getRows().size(), 32); + List<Object[]> gapFillRows = gapFillResultTable.getRows(); + long start = dateTimeFormatter.fromFormatToMillis("454515"); + for (int i = 0; i < 32; i += 4) { + Long firstTimeCol = (Long) gapFillRows.get(i)[0]; + long timeStamp = dateTimeFormatter.fromFormatToMillis(firstTimeCol.toString()); + Assert.assertEquals(timeStamp, start); + Set<String> lots = new HashSet<>(); + lots.add((String) gapFillRows.get(i)[1]); + for (int j = 1; j < 4; j++) { + Assert.assertEquals(gapFillRows.get(i)[0], gapFillRows.get(i + j)[0]); + Assert.assertFalse(lots.contains(gapFillRows.get(i + j)[1])); + lots.add((String) gapFillRows.get(i + j)[1]); + } + start += dateTimeGranularity.granularityToMillis(); + } + } + + @Test + public void toEpochMinutesRoundedHoursGapfillTest() { + String dataTimeConvertQuery = "SELECT " + + "ToEpochMinutesRounded(eventTime, 60) AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 24); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(ToEpochMinutesRounded(eventTime, 60), '1:HOURS:EPOCH', " + + "'454515', '454524', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + DateTimeFormatSpec dateTimeFormatter = new DateTimeFormatSpec("1:HOURS:EPOCH"); + DateTimeGranularitySpec dateTimeGranularity = new DateTimeGranularitySpec("1:HOURS"); + + BrokerResponseNative gapfillBrokerResponse = getBrokerResponseForSqlQuery(gapfillQuery); + + ResultTable gapFillResultTable = gapfillBrokerResponse.getResultTable(); + Assert.assertEquals(gapFillResultTable.getRows().size(), 32); + List<Object[]> gapFillRows = gapFillResultTable.getRows(); + long start = dateTimeFormatter.fromFormatToMillis("454515"); + for (int i = 0; i < 32; i += 4) { + Long firstTimeCol = (Long) gapFillRows.get(i)[0]; + long timeStamp = dateTimeFormatter.fromFormatToMillis(firstTimeCol.toString()); + Assert.assertEquals(timeStamp, start); + Set<String> lots = new HashSet<>(); + lots.add((String) gapFillRows.get(i)[1]); + for (int j = 1; j < 4; j++) { + Assert.assertEquals(gapFillRows.get(i)[0], gapFillRows.get(i + j)[0]); + Assert.assertFalse(lots.contains(gapFillRows.get(i + j)[1])); + lots.add((String) gapFillRows.get(i + j)[1]); + } + start += dateTimeGranularity.granularityToMillis(); + } + } + + @Test + public void toEpochMinutesBucketHoursGapfillTest() { + String dataTimeConvertQuery = "SELECT " + + "ToEpochMinutesBucket(eventTime, 60) AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 24); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(ToEpochMinutesBucket(eventTime, 60), '1:HOURS:EPOCH', " + + "'454515', '454524', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + DateTimeFormatSpec dateTimeFormatter = new DateTimeFormatSpec("1:HOURS:EPOCH"); + DateTimeGranularitySpec dateTimeGranularity = new DateTimeGranularitySpec("1:HOURS"); + + BrokerResponseNative gapfillBrokerResponse = getBrokerResponseForSqlQuery(gapfillQuery); + + ResultTable gapFillResultTable = gapfillBrokerResponse.getResultTable(); + Assert.assertEquals(gapFillResultTable.getRows().size(), 32); + List<Object[]> gapFillRows = gapFillResultTable.getRows(); + long start = dateTimeFormatter.fromFormatToMillis("454515"); + for (int i = 0; i < 32; i += 4) { + Long firstTimeCol = (Long) gapFillRows.get(i)[0]; + long timeStamp = dateTimeFormatter.fromFormatToMillis(firstTimeCol.toString()); + Assert.assertEquals(timeStamp, start); + Set<String> lots = new HashSet<>(); + lots.add((String) gapFillRows.get(i)[1]); + for (int j = 1; j < 4; j++) { + Assert.assertEquals(gapFillRows.get(i)[0], gapFillRows.get(i + j)[0]); + Assert.assertFalse(lots.contains(gapFillRows.get(i + j)[1])); + lots.add((String) gapFillRows.get(i + j)[1]); + } + start += dateTimeGranularity.granularityToMillis(); + } + } + + @Test + public void dateTruncHoursGapfillTest() { + String dataTimeConvertQuery = "SELECT " + + "DATETRUNC('hour', eventTime, 'milliseconds') AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 24); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(DATETRUNC('hour', eventTime, 'milliseconds'), '1:HOURS:EPOCH', " + + "'454515', '454524', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "ORDER BY 1 " + + "LIMIT 200"; + + DateTimeFormatSpec dateTimeFormatter = new DateTimeFormatSpec("1:HOURS:EPOCH"); + DateTimeGranularitySpec dateTimeGranularity = new DateTimeGranularitySpec("1:HOURS"); + + BrokerResponseNative gapfillBrokerResponse = getBrokerResponseForSqlQuery(gapfillQuery); + + ResultTable gapFillResultTable = gapfillBrokerResponse.getResultTable(); + Assert.assertEquals(gapFillResultTable.getRows().size(), 32); + List<Object[]> gapFillRows = gapFillResultTable.getRows(); + long start = dateTimeFormatter.fromFormatToMillis("454515"); + for (int i = 0; i < 32; i += 4) { + Long firstTimeCol = (Long) gapFillRows.get(i)[0]; + long timeStamp = dateTimeFormatter.fromFormatToMillis(firstTimeCol.toString()); + Assert.assertEquals(timeStamp, start); + Set<String> lots = new HashSet<>(); + lots.add((String) gapFillRows.get(i)[1]); + for (int j = 1; j < 4; j++) { + Assert.assertEquals(gapFillRows.get(i)[0], gapFillRows.get(i + j)[0]); + Assert.assertFalse(lots.contains(gapFillRows.get(i + j)[1])); + lots.add((String) gapFillRows.get(i + j)[1]); + } + start += dateTimeGranularity.granularityToMillis(); + } + } + + @Test + public void datetimeconvertGapfillTestWithoutTimeBucketOrdering() { + try { + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(DATETIMECONVERT(eventTime, '1:MILLISECONDS:EPOCH', " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:HOURS'), " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'2021-11-07 3:00:00.000', '2021-11-07 12:00:00.000', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "LIMIT 200"; + + getBrokerResponseForSqlQuery(gapfillQuery); + Assert.fail(); + } catch (IllegalArgumentException e) { + Assert.assertEquals(e.getMessage(), "PostAggregateGapFill does not work if the time bucket is not ordered."); + } + } + + @Test + public void datetimeconvertGapfillTestWithHavingClause() { + String dataTimeConvertQuery = "SELECT " + + "DATETIMECONVERT(eventTime, '1:MILLISECONDS:EPOCH', " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:HOURS') AS time_col, " + + "lotId, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN')" + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "HAVING lotId IN ('LotId_0', 'LotId_1', 'LotId_2') " + + "ORDER BY 1 " + + "LIMIT 200"; + + BrokerResponseNative dateTimeConvertBrokerResponse = getBrokerResponseForSqlQuery(dataTimeConvertQuery); + + ResultTable dateTimeConvertResultTable = dateTimeConvertBrokerResponse.getResultTable(); + Assert.assertEquals(dateTimeConvertResultTable.getRows().size(), 18); + + String gapfillQuery = "SELECT " + + "PostAggregateGapFill(DATETIMECONVERT(eventTime, '1:MILLISECONDS:EPOCH', " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', '1:HOURS'), " + + "'1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS', " + + "'2021-11-07 3:00:00.000', '2021-11-07 12:00:00.000', '1:HOURS') AS time_col, " + + "lotId, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_PREVIOUS_VALUE') as status1, " + + "FILL(lastWithTime(isOccupied, eventTime, 'BOOLEAN'), 'FILL_DEFAULT_VALUE') as status2, " + + "lastWithTime(isOccupied, eventTime, 'BOOLEAN') as status3 " + + "FROM parkingData " + + "WHERE eventTime >= 1635940800000 AND eventTime <= 1636286400000 " + + "GROUP BY 1, 2 " + + "HAVING lotId IN ('LotId_0', 'LotId_1', 'LotId_2') " Review comment: Having should be applied to post-aggregations (e.g. `status1`) instead of columns. This having can be rewritten as where clause. Currently having is applied before filling the values, so I believe it could cause unexpected results. Ideally we should first fill the values then applying the having filter. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org