mayankshriv commented on a change in pull request #5872: URL: https://github.com/apache/incubator-pinot/pull/5872#discussion_r471798153
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/DictionaryBasedAggregationOperator.java ########## @@ -77,36 +83,42 @@ protected IntermediateResultsBlock getNextBlock() { .add(new MinMaxRangePair(dictionary.getDoubleValue(0), dictionary.getDoubleValue(dictionarySize - 1))); break; case DISTINCTCOUNT: - IntOpenHashSet set = new IntOpenHashSet(dictionarySize); + AbstractCollection set; switch (dictionary.getValueType()) { case INT: + set = new IntOpenHashSet(dictionarySize); for (int dictId = 0; dictId < dictionarySize; dictId++) { set.add(dictionary.getIntValue(dictId)); } break; case LONG: + set = new LongOpenHashSet(dictionarySize); for (int dictId = 0; dictId < dictionarySize; dictId++) { - set.add(Long.hashCode(dictionary.getLongValue(dictId))); + set.add(dictionary.getLongValue(dictId)); } break; case FLOAT: + set = new FloatOpenHashSet(dictionarySize); for (int dictId = 0; dictId < dictionarySize; dictId++) { - set.add(Float.hashCode(dictionary.getFloatValue(dictId))); + set.add(dictionary.getFloatValue(dictId)); } break; case DOUBLE: + set = new DoubleOpenHashSet(dictionarySize); for (int dictId = 0; dictId < dictionarySize; dictId++) { - set.add(Double.hashCode(dictionary.getDoubleValue(dictId))); + set.add(dictionary.getDoubleValue(dictId)); } break; case STRING: + set = new ObjectOpenHashSet<ByteBuffer>(dictionarySize); Review comment: why not byte[]? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/common/ObjectSerDeUtils.java ########## @@ -452,6 +476,139 @@ public IntSet deserialize(ByteBuffer byteBuffer) { } }; + public static final ObjectSerDe<LongSet> LONG_SET_SER_DE = new ObjectSerDe<LongSet>() { + + @Override + public byte[] serialize(LongSet longSet) { + int size = longSet.size(); + byte[] bytes = new byte[Integer.BYTES + size * Long.BYTES]; + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); + byteBuffer.putInt(size); + LongIterator iterator = longSet.iterator(); + while (iterator.hasNext()) { + byteBuffer.putLong(iterator.nextLong()); + } + return bytes; + } + + @Override + public LongSet deserialize(byte[] bytes) { + return deserialize(ByteBuffer.wrap(bytes)); + } + + @Override + public LongSet deserialize(ByteBuffer byteBuffer) { + int size = byteBuffer.getInt(); + LongSet longSet = new LongOpenHashSet(size); + for (int i = 0; i < size; i++) { + longSet.add(byteBuffer.getLong()); + } + return longSet; + } + }; + + public static final ObjectSerDe<FloatSet> FLOAT_SET_SER_DE = new ObjectSerDe<FloatSet>() { + + @Override + public byte[] serialize(FloatSet floatSet) { + int size = floatSet.size(); + byte[] bytes = new byte[Integer.BYTES + size * Float.BYTES]; + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); + byteBuffer.putInt(size); Review comment: Wondering if we should have a single ser/de for different data types, by writing the data type as part of header. Not sure if the iterators share the same interface to be able to share the same `serialize()`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org