kishoreg commented on a change in pull request #5786:
URL: https://github.com/apache/incubator-pinot/pull/5786#discussion_r464110519



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PartitionedDistinctCountAggregationFunction.java
##########
@@ -0,0 +1,425 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
+import it.unimi.dsi.fastutil.floats.FloatOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
+import java.util.Collection;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.pinot.common.function.AggregationFunctionType;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.core.query.request.context.ExpressionContext;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.utils.ByteArray;
+import org.roaringbitmap.RoaringBitmap;
+
+
+/**
+ * The {@code PartitionedDistinctCountAggregationFunction} calculates the 
number of distinct values for a given
+ * single-value expression.
+ * <p>IMPORTANT: This function relies on the expression values being 
partitioned for each segment, where there is no
+ * common values within different segments.
+ * <p>This function calculates the exact number of distinct values within the 
segment, then simply sums up the results
+ * from different segments to get the final result.
+ */
+public class PartitionedDistinctCountAggregationFunction extends 
BaseSingleInputAggregationFunction<Long, Long> {
+
+  public PartitionedDistinctCountAggregationFunction(ExpressionContext 
expression) {
+    super(expression);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.PARTITIONEDDISTINCTCOUNT;
+  }
+
+  @Override
+  public void accept(AggregationFunctionVisitorBase visitor) {
+    visitor.visit(this);
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+
+    // For dictionary-encoded expression, store dictionary ids into a 
RoaringBitmap
+    if (blockValSet.getDictionary() != null) {
+      int[] dictIds = blockValSet.getDictionaryIdsSV();
+      RoaringBitmap bitmap = aggregationResultHolder.getResult();
+      if (bitmap == null) {
+        bitmap = new RoaringBitmap();
+        aggregationResultHolder.setValue(bitmap);
+      }
+      bitmap.addN(dictIds, 0, length);
+      return;
+    }
+
+    // For non-dictionary-encoded expression, store INT values into a 
RoaringBitmap, other types into an OpenHashSet
+    DataType valueType = blockValSet.getValueType();
+    switch (valueType) {
+      case INT:
+        int[] intValues = blockValSet.getIntValuesSV();
+        RoaringBitmap bitmap = aggregationResultHolder.getResult();
+        if (bitmap == null) {
+          bitmap = new RoaringBitmap();
+          aggregationResultHolder.setValue(bitmap);
+        }
+        bitmap.addN(intValues, 0, length);
+        break;
+      case LONG:
+        long[] longValues = blockValSet.getLongValuesSV();
+        LongOpenHashSet longSet = aggregationResultHolder.getResult();

Review comment:
       I thought roaring bitmap works for long as well?

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java
##########
@@ -123,6 +123,8 @@ public static AggregationFunction 
getAggregationFunction(FunctionContext functio
             return new DistinctCountAggregationFunction(firstArgument);
           case DISTINCTCOUNTBITMAP:
             return new DistinctCountBitmapAggregationFunction(firstArgument);
+          case PARTITIONEDDISTINCTCOUNT:

Review comment:
       we need to start using _ separators. I think the parser now handles both 
automatically




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to