Jackie-Jiang commented on a change in pull request #6559: URL: https://github.com/apache/incubator-pinot/pull/6559#discussion_r573384919
########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java ########## @@ -831,6 +777,156 @@ private String getGroupKey(IntArray rawKey) { return groupKeyBuilder.toString(); } + /** + * Fast int-to-int hashmap with {@link #INVALID_ID} as the default return value. + * <p>Different from {@link it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap}, this map uses one single array to store + * keys and values to reduce the cache miss. + */ + @VisibleForTesting + public static class IntGroupIdMap { + private static final float LOAD_FACTOR = 0.75f; + + private int[] _keyValueHolder; + private int _capacity; + private int _mask; + private int _maxNumEntries; + private int _size; + + public IntGroupIdMap() { + _capacity = 1 << 10; + int holderSize = _capacity << 1; + _keyValueHolder = new int[holderSize]; + _mask = holderSize - 1; + _maxNumEntries = (int) (_capacity * LOAD_FACTOR); + } + + public int size() { + return _size; + } + + /** + * Returns the group id for the given raw key. Create a new group id if the raw key does not exist and the group id + * upper bound is not reached. + */ + public int getGroupId(int rawKey, int groupIdUpperBound) { + // NOTE: Key 0 is reserved as the null key. Use (rawKey + 1) as the internal key because rawKey can never be -1. + int internalKey = rawKey + 1; + int index = (HashCommon.mix(internalKey) << 1) & _mask; + int key = _keyValueHolder[index]; + + // Handle hash hit separately for better performance + if (key == internalKey) { + return _keyValueHolder[index + 1]; + } + if (key == 0) { + return _size < groupIdUpperBound ? addNewGroup(internalKey, index) : INVALID_ID; + } + + // Hash collision + while (true) { + index = (index + 2) & _mask; + key = _keyValueHolder[index]; + if (key == internalKey) { + return _keyValueHolder[index + 1]; + } + if (key == 0) { + return _size < groupIdUpperBound ? addNewGroup(internalKey, index) : INVALID_ID; + } + } + } + + private int addNewGroup(int internalKey, int index) { + int groupId = _size++; + _keyValueHolder[index] = internalKey; + _keyValueHolder[index + 1] = groupId; + if (_size > _maxNumEntries) { + expand(); + } + return groupId; + } + + private void expand() { + _capacity <<= 1; + int holderSize = _capacity << 1; + int[] oldKeyValueHolder = _keyValueHolder; + _keyValueHolder = new int[holderSize]; + _mask = holderSize - 1; + _maxNumEntries <<= 1; + int oldIndex = 0; + for (int i = 0; i < _size; i++) { + while (oldKeyValueHolder[oldIndex] == 0) { + oldIndex += 2; + } + int key = oldKeyValueHolder[oldIndex]; + int value = oldKeyValueHolder[oldIndex + 1]; + int newIndex = (HashCommon.mix(key) << 1) & _mask; + if (_keyValueHolder[newIndex] != 0) { + do { + newIndex = (newIndex + 2) & _mask; + } while (_keyValueHolder[newIndex] != 0); + } + _keyValueHolder[newIndex] = key; + _keyValueHolder[newIndex + 1] = value; + oldIndex += 2; + } + } + + public Iterator<Entry> iterator() { + return new Iterator<Entry>() { + private final Entry _entry = new Entry(); + private int _index; + private int _numRemainingEntries = _size; + + @Override + public boolean hasNext() { + return _numRemainingEntries > 0; + } + + @Override + public Entry next() { + int key; + while ((key = _keyValueHolder[_index]) == 0) { + _index += 2; + } + _entry._rawKey = key - 1; + _entry._groupId = _keyValueHolder[_index + 1]; + _index += 2; + _numRemainingEntries--; + return _entry; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * Clears the map and trims the map if the size is larger than the {@link #MAX_CACHING_MAP_SIZE}. + */ + public void clear() { + if (_size == 0) { + return; + } + if (_size <= MAX_CACHING_MAP_SIZE) { + Arrays.fill(_keyValueHolder, 0); + } else { + _capacity = 1 << 10; Review comment: Good point, added ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org