Jackie-Jiang commented on a change in pull request #7584: URL: https://github.com/apache/pinot/pull/7584#discussion_r733222357
########## File path: pinot-core/src/main/java/org/apache/pinot/core/common/ObjectSerDeUtils.java ########## @@ -109,7 +115,12 @@ private ObjectSerDeUtils() { Int2LongMap(23), Long2LongMap(24), Float2LongMap(25), - Double2LongMap(26); + Double2LongMap(26), + IntValueTimePair(27), Review comment: Let's name them `IntLongPair`, `LongLongPair` etc so that they are more generic, and can be reused in the future ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/ValueTimePair.java ########## @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.customobject; + +public abstract class ValueTimePair<V extends Comparable<V>> implements Comparable<ValueTimePair<V>> { + protected V _value; + protected long _time; + + public ValueTimePair(V value, long time) { + _value = value; + _time = time; + } + + public V getValue() { + return _value; + } + + public long getTime() { + return _time; + } + + abstract public byte[] toBytes(); + + @Override + public int compareTo(ValueTimePair<V> o) { Review comment: The comparison logic should be within the aggregation function, so that we can reuse this pair for `FIRST` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/common/ObjectSerDeUtils.java ########## @@ -330,6 +351,101 @@ public MinMaxRangePair deserialize(ByteBuffer byteBuffer) { } }; + public static final ObjectSerDe<? extends ValueTimePair<Integer>> INT_VAL_TIME_PAIR_SER_DE + = new ObjectSerDe<IntValueTimePair>() { Review comment: Please follow the pinot style, and reformat the changes: https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#setup-ide ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java ########## @@ -156,6 +158,45 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio return new AvgAggregationFunction(firstArgument); case MODE: return new ModeAggregationFunction(arguments); + case LASTWITHTIME: + if (arguments.size() > 1) { + ExpressionContext timeCol = arguments.get(1); + String dataType = arguments.get(2).getIdentifier(); Review comment: The third argument should be a literal. Throw exception if it is not ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/LastDoubleValueWithTimeAggregationFunction.java ########## @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.Arrays; +import java.util.List; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.segment.local.customobject.DoubleValueTimePair; +import org.apache.pinot.segment.local.customobject.ValueTimePair; + +/** + * This function is used for LastWithTime calculations for data column with double type. + * <p>The function can be used as LastWithTime(dataExpression, timeExpression, 'double') + * <p>Following arguments are supported: + * <ul> + * <li>dataExpression: expression that contains the double data column to be calculated last on</li> + * <li>timeExpression: expression that contains the column to be used to decide which data is last, can be any + * Numeric column</li> + * </ul> + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class LastDoubleValueWithTimeAggregationFunction extends LastWithTimeAggregationFunction<Double> { + + private final static ValueTimePair<Double> DEFAULT_VALUE_TIME_PAIR + = new DoubleValueTimePair(Double.NaN, Long.MIN_VALUE); + + public LastDoubleValueWithTimeAggregationFunction( + ExpressionContext dataCol, + ExpressionContext timeCol, + ObjectSerDeUtils.ObjectSerDe<? extends ValueTimePair<Double>> objectSerDe) { + super(dataCol, timeCol, objectSerDe); + } + + @Override + public List<ExpressionContext> getInputExpressions() { + return Arrays.asList(_expression, _timeCol, ExpressionContext.forLiteral("Long")); + } + + @Override + public ValueTimePair<Double> constructValueTimePair(Double value, long time) { + return new DoubleValueTimePair(value, time); + } + + @Override + public ValueTimePair<Double> getDefaultValueTimePair() { + return DEFAULT_VALUE_TIME_PAIR; + } + + @Override + public void updateResultWithRawData(int length, AggregationResultHolder aggregationResultHolder, + BlockValSet blockValSet, BlockValSet timeValSet) { + ValueTimePair<Double> defaultValueTimePair = getDefaultValueTimePair(); + Double lastData = defaultValueTimePair.getValue(); + long lastTime = defaultValueTimePair.getTime(); + double [] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double data = doubleValues[i]; + long time = timeValues[i]; + if (time >= lastTime) { + lastTime = time; + lastData = data; + } + } + setAggregationResult(aggregationResultHolder, lastData, lastTime); + } + + @Override + public void updateGroupResultWithRawDataSv(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + BlockValSet blockValSet, BlockValSet timeValSet) { + double[] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double data = doubleValues[i]; + long time = timeValues[i]; + setGroupByResult(groupKeyArray[i], groupByResultHolder, data, time); + } + } + + @Override + public void updateGroupResultWithRawDataMv(int length, + int[][] groupKeysArray, + GroupByResultHolder groupByResultHolder, + BlockValSet blockValSet, + BlockValSet timeValSet) { + double[] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double value = doubleValues[i]; + long time = timeValues[i]; + for (int groupKey : groupKeysArray[i]) { + setGroupByResult(groupKey, groupByResultHolder, value, time); + } + } + } + + @Override + public String getResultColumnName() { + return getType().getName().toLowerCase() + "(" + _expression + "," + _timeCol + ", Double)"; Review comment: Same for other places ```suggestion return getType().getName().toLowerCase() + "(" + _expression + "," + _timeCol + ",'DOUBLE')"; ``` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java ########## @@ -156,6 +158,45 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio return new AvgAggregationFunction(firstArgument); case MODE: return new ModeAggregationFunction(arguments); + case LASTWITHTIME: + if (arguments.size() > 1) { + ExpressionContext timeCol = arguments.get(1); + String dataType = arguments.get(2).getIdentifier(); + DataSchema.ColumnDataType columnDataType = DataSchema.ColumnDataType.valueOf(dataType.toUpperCase()); Review comment: We should use `FieldSpec.DataType`. You can do `DataType.valueOf(dataType.toUpperCase()).getStoredType()` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java ########## @@ -156,6 +158,45 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio return new AvgAggregationFunction(firstArgument); case MODE: return new ModeAggregationFunction(arguments); + case LASTWITHTIME: + if (arguments.size() > 1) { Review comment: It should be `== 3`? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java ########## @@ -156,6 +158,45 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio return new AvgAggregationFunction(firstArgument); case MODE: return new ModeAggregationFunction(arguments); + case LASTWITHTIME: + if (arguments.size() > 1) { + ExpressionContext timeCol = arguments.get(1); + String dataType = arguments.get(2).getIdentifier(); + DataSchema.ColumnDataType columnDataType = DataSchema.ColumnDataType.valueOf(dataType.toUpperCase()); + switch (columnDataType) { + case BOOLEAN: + case INT: + return new LastIntValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.INT_VAL_TIME_PAIR_SER_DE, Review comment: (nit) No need to pass in the `ObjectSerDe` as they are fixed for each function. ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java ########## @@ -156,6 +158,45 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio return new AvgAggregationFunction(firstArgument); case MODE: return new ModeAggregationFunction(arguments); + case LASTWITHTIME: + if (arguments.size() > 1) { + ExpressionContext timeCol = arguments.get(1); + String dataType = arguments.get(2).getIdentifier(); + DataSchema.ColumnDataType columnDataType = DataSchema.ColumnDataType.valueOf(dataType.toUpperCase()); + switch (columnDataType) { + case BOOLEAN: + case INT: + return new LastIntValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.INT_VAL_TIME_PAIR_SER_DE, + columnDataType == DataSchema.ColumnDataType.BOOLEAN); + case LONG: + return new LastLongValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.LONG_VAL_TIME_PAIR_SER_DE); + case FLOAT: + return new LastFloatValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.FLOAT_VAL_TIME_PAIR_SER_DE); + case DOUBLE: + return new LastDoubleValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.DOUBLE_VAL_TIME_PAIR_SER_DE); + case STRING: + return new LastStringValueWithTimeAggregationFunction( + firstArgument, + timeCol, + ObjectSerDeUtils.STRING_VAL_TIME_PAIR_SER_DE); + default: + throw new IllegalArgumentException("Unsupported Value Type for LastWithTime Function:" + dataType); + } + } else { + throw new IllegalArgumentException("Two arguments are required for LastWithTime Function."); Review comment: Seems it requires 3 arguments? Can you please also add the expected arguments in the error message? ########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/ValueTimePair.java ########## @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.customobject; + +public abstract class ValueTimePair<V extends Comparable<V>> implements Comparable<ValueTimePair<V>> { Review comment: Let's rename it to `ValueLongPair` so that its more generic and can be reused in the future ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/LastWithTimeAggregationFunction.java ########## @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.segment.local.customobject.ValueTimePair; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.data.FieldSpec.DataType; + +/** + * This function is used for LastWithTime calculations. + * <p>The function can be used as LastWithTime(dataExpression, timeExpression, 'dataType') + * <p>Following arguments are supported: + * <ul> + * <li>dataExpression: expression that contains the column to be calculated last on</li> + * <li>timeExpression: expression that contains the column to be used to decide which data is last, can be any + * Numeric column</li> + * <li>dataType: the data type of data column</li> + * </ul> + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public abstract class LastWithTimeAggregationFunction<V extends Comparable<V>> + extends BaseSingleInputAggregationFunction<ValueTimePair<V>, V> { + protected final ExpressionContext _timeCol; + private final ObjectSerDeUtils.ObjectSerDe<? extends ValueTimePair<V>> _objectSerDe; + + public LastWithTimeAggregationFunction(ExpressionContext dataCol, + ExpressionContext timeCol, + ObjectSerDeUtils.ObjectSerDe<? extends ValueTimePair<V>> objectSerDe) { + super(dataCol); + _timeCol = timeCol; + _objectSerDe = objectSerDe; + } + + public abstract ValueTimePair<V> constructValueTimePair(V value, long time); + + public abstract ValueTimePair<V> getDefaultValueTimePair(); + + public abstract void updateResultWithRawData(int length, AggregationResultHolder aggregationResultHolder, Review comment: (nit) Suggest renaming it to `aggregate` for readability. Same for group-by ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/LastDoubleValueWithTimeAggregationFunction.java ########## @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.Arrays; +import java.util.List; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.segment.local.customobject.DoubleValueTimePair; +import org.apache.pinot.segment.local.customobject.ValueTimePair; + +/** + * This function is used for LastWithTime calculations for data column with double type. + * <p>The function can be used as LastWithTime(dataExpression, timeExpression, 'double') + * <p>Following arguments are supported: + * <ul> + * <li>dataExpression: expression that contains the double data column to be calculated last on</li> + * <li>timeExpression: expression that contains the column to be used to decide which data is last, can be any + * Numeric column</li> + * </ul> + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class LastDoubleValueWithTimeAggregationFunction extends LastWithTimeAggregationFunction<Double> { + + private final static ValueTimePair<Double> DEFAULT_VALUE_TIME_PAIR + = new DoubleValueTimePair(Double.NaN, Long.MIN_VALUE); + + public LastDoubleValueWithTimeAggregationFunction( + ExpressionContext dataCol, + ExpressionContext timeCol, + ObjectSerDeUtils.ObjectSerDe<? extends ValueTimePair<Double>> objectSerDe) { + super(dataCol, timeCol, objectSerDe); + } + + @Override + public List<ExpressionContext> getInputExpressions() { + return Arrays.asList(_expression, _timeCol, ExpressionContext.forLiteral("Long")); + } + + @Override + public ValueTimePair<Double> constructValueTimePair(Double value, long time) { + return new DoubleValueTimePair(value, time); + } + + @Override + public ValueTimePair<Double> getDefaultValueTimePair() { + return DEFAULT_VALUE_TIME_PAIR; + } + + @Override + public void updateResultWithRawData(int length, AggregationResultHolder aggregationResultHolder, + BlockValSet blockValSet, BlockValSet timeValSet) { + ValueTimePair<Double> defaultValueTimePair = getDefaultValueTimePair(); + Double lastData = defaultValueTimePair.getValue(); + long lastTime = defaultValueTimePair.getTime(); + double [] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double data = doubleValues[i]; + long time = timeValues[i]; + if (time >= lastTime) { + lastTime = time; + lastData = data; + } + } + setAggregationResult(aggregationResultHolder, lastData, lastTime); + } + + @Override + public void updateGroupResultWithRawDataSv(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + BlockValSet blockValSet, BlockValSet timeValSet) { + double[] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double data = doubleValues[i]; + long time = timeValues[i]; + setGroupByResult(groupKeyArray[i], groupByResultHolder, data, time); + } + } + + @Override + public void updateGroupResultWithRawDataMv(int length, + int[][] groupKeysArray, + GroupByResultHolder groupByResultHolder, + BlockValSet blockValSet, + BlockValSet timeValSet) { + double[] doubleValues = blockValSet.getDoubleValuesSV(); + long[] timeValues = timeValSet.getLongValuesSV(); + for (int i = 0; i < length; i++) { + double value = doubleValues[i]; + long time = timeValues[i]; + for (int groupKey : groupKeysArray[i]) { + setGroupByResult(groupKey, groupByResultHolder, value, time); + } + } + } + + @Override + public String getResultColumnName() { Review comment: Also need to override `getColumnName()` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/LastDoubleValueWithTimeAggregationFunction.java ########## @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import java.util.Arrays; +import java.util.List; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.segment.local.customobject.DoubleValueTimePair; +import org.apache.pinot.segment.local.customobject.ValueTimePair; + +/** + * This function is used for LastWithTime calculations for data column with double type. + * <p>The function can be used as LastWithTime(dataExpression, timeExpression, 'double') + * <p>Following arguments are supported: + * <ul> + * <li>dataExpression: expression that contains the double data column to be calculated last on</li> + * <li>timeExpression: expression that contains the column to be used to decide which data is last, can be any + * Numeric column</li> + * </ul> + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class LastDoubleValueWithTimeAggregationFunction extends LastWithTimeAggregationFunction<Double> { + + private final static ValueTimePair<Double> DEFAULT_VALUE_TIME_PAIR + = new DoubleValueTimePair(Double.NaN, Long.MIN_VALUE); + + public LastDoubleValueWithTimeAggregationFunction( + ExpressionContext dataCol, + ExpressionContext timeCol, + ObjectSerDeUtils.ObjectSerDe<? extends ValueTimePair<Double>> objectSerDe) { + super(dataCol, timeCol, objectSerDe); + } + + @Override + public List<ExpressionContext> getInputExpressions() { + return Arrays.asList(_expression, _timeCol, ExpressionContext.forLiteral("Long")); Review comment: Don't read the literal as input, it has overhead. Just keep `_expression` and `_timeCol` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org