yashmayya commented on code in PR #16836: URL: https://github.com/apache/pinot/pull/16836#discussion_r2456644376
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/AvgMVValueAggregator.java: ########## @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.aggregator; + +import javax.annotation.Nullable; +import org.apache.pinot.segment.local.customobject.AvgPair; +import org.apache.pinot.segment.local.utils.CustomSerDeUtils; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.data.FieldSpec.DataType; + + +/** + * Value aggregator for AVGMV aggregation function. + * This aggregator handles multi-value columns by computing the average across all values in all arrays. + */ +public class AvgMVValueAggregator implements ValueAggregator<Object, AvgPair> { + public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; + + @Override + public AggregationFunctionType getAggregationType() { + return AggregationFunctionType.AVGMV; + } + + @Override + public DataType getAggregatedValueType() { + return AGGREGATED_VALUE_TYPE; + } + + @Override + public AvgPair getInitialAggregatedValue(@Nullable Object rawValue) { + if (rawValue == null) { + return new AvgPair(); + } + if (rawValue instanceof byte[]) { + return deserializeAggregatedValue((byte[]) rawValue); + } else { + return processMultiValueArray(rawValue); + } + } + + @Override + public AvgPair applyRawValue(AvgPair value, Object rawValue) { + if (rawValue instanceof byte[]) { + value.apply(deserializeAggregatedValue((byte[]) rawValue)); + } else { + AvgPair mvResult = processMultiValueArray(rawValue); + value.apply(mvResult); + } + return value; + } + + @Override + public AvgPair applyAggregatedValue(AvgPair value, AvgPair aggregatedValue) { + value.apply(aggregatedValue); + return value; + } + + @Override + public AvgPair cloneAggregatedValue(AvgPair value) { + return new AvgPair(value.getSum(), value.getCount()); + } + + @Override + public boolean isAggregatedValueFixedSize() { + return true; + } + + @Override + public int getMaxAggregatedValueByteSize() { + return Double.BYTES + Long.BYTES; + } + + @Override + public byte[] serializeAggregatedValue(AvgPair value) { + return CustomSerDeUtils.AVG_PAIR_SER_DE.serialize(value); + } + + @Override + public AvgPair deserializeAggregatedValue(byte[] bytes) { + return CustomSerDeUtils.AVG_PAIR_SER_DE.deserialize(bytes); + } + + /** + * Processes a multi-value array and returns an AvgPair with the sum and count. + * The rawValue can be an Object[] array containing numeric values. + */ + private AvgPair processMultiValueArray(Object rawValue) { + if (rawValue instanceof Object[]) { Review Comment: Hm I meant primitive array like `double[]` which is also an `Object`. I vaguely recall some MV paths using primitive arrays instead of arrays of the boxed type. Doesn't look like this is the case for the star-tree index though, so this should be fine for now ([link1](https://github.com/apache/pinot/blob/9265b964855a9abc10f19de05147ef191ae147d1/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentColumnReader.java#L93-L121), [link2](https://github.com/apache/pinot/blob/9265b964855a9abc10f19de05147ef191ae147d1/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/BaseSingleTreeBuilder.java#L247)). ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AvgMVAggregationFunction.java: ########## @@ -43,8 +44,25 @@ public AggregationFunctionType getType() { public void aggregate(int length, AggregationResultHolder aggregationResultHolder, Map<ExpressionContext, BlockValSet> blockValSetMap) { BlockValSet blockValSet = blockValSetMap.get(_expression); - double[][] valuesArray = blockValSet.getDoubleValuesMV(); + if (blockValSet.isSingleValue()) { + // StarTree pre-aggregated values: During StarTree creation, the multi-value column is pre-aggregated per StarTree + // node, resulting in a single value per node. + byte[][] bytesValues = blockValSet.getBytesValuesSV(); + AvgPair avgPair = new AvgPair(); + forEachNotNull(length, blockValSet, (from, to) -> { Review Comment: Yeah that's a mistake as well (by me, as per Git blame :) It's not really a big deal either way, but yeah let's fix both of these if you don't mind! ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/AvgMVValueAggregator.java: ########## @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.aggregator; + +import javax.annotation.Nullable; +import org.apache.pinot.segment.local.customobject.AvgPair; +import org.apache.pinot.segment.local.utils.CustomSerDeUtils; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.data.FieldSpec.DataType; + + +/** + * Value aggregator for AVGMV aggregation function. + * This aggregator handles multi-value columns by computing the average across all values in all arrays. + */ +public class AvgMVValueAggregator implements ValueAggregator<Object, AvgPair> { + public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; + + @Override + public AggregationFunctionType getAggregationType() { + return AggregationFunctionType.AVGMV; + } + + @Override + public DataType getAggregatedValueType() { + return AGGREGATED_VALUE_TYPE; + } + + @Override + public AvgPair getInitialAggregatedValue(@Nullable Object rawValue) { + if (rawValue == null) { + return new AvgPair(); + } + if (rawValue instanceof byte[]) { + return deserializeAggregatedValue((byte[]) rawValue); + } else { + return processMultiValueArray(rawValue); + } + } + + @Override + public AvgPair applyRawValue(AvgPair value, Object rawValue) { + if (rawValue instanceof byte[]) { + value.apply(deserializeAggregatedValue((byte[]) rawValue)); + } else { + AvgPair mvResult = processMultiValueArray(rawValue); + value.apply(mvResult); + } + return value; + } + + @Override + public AvgPair applyAggregatedValue(AvgPair value, AvgPair aggregatedValue) { + value.apply(aggregatedValue); + return value; + } + + @Override + public AvgPair cloneAggregatedValue(AvgPair value) { + return new AvgPair(value.getSum(), value.getCount()); + } + + @Override + public boolean isAggregatedValueFixedSize() { + return true; + } Review Comment: Thanks for verifying! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
