This is an automated email from the ASF dual-hosted git repository. richardstartin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 65dcfe7 Allocation free `DataBlockCache` lookups (#8140) 65dcfe7 is described below commit 65dcfe785e0e65779e12a1e730365b99bc825428 Author: Richard Startin <rich...@startree.ai> AuthorDate: Tue Feb 8 09:04:46 2022 +0000 Allocation free `DataBlockCache` lookups (#8140) * intern DataBlockCache lookup keys * comments --- .../apache/pinot/core/common/DataBlockCache.java | 120 +++++++++------------ 1 file changed, 48 insertions(+), 72 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/common/DataBlockCache.java b/pinot-core/src/main/java/org/apache/pinot/core/common/DataBlockCache.java index 24e71b1..254a43b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/common/DataBlockCache.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/common/DataBlockCache.java @@ -18,15 +18,14 @@ */ package org.apache.pinot.core.common; +import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -import javax.annotation.Nonnull; import org.apache.pinot.core.plan.DocIdSetPlanNode; import org.apache.pinot.segment.spi.evaluator.TransformEvaluator; import org.apache.pinot.spi.data.FieldSpec; -import org.apache.pinot.spi.utils.EqualityUtils; /** @@ -40,12 +39,12 @@ public class DataBlockCache { // Mark whether data have been fetched, need to be cleared in initNewBlock() private final Set<String> _columnDictIdLoaded = new HashSet<>(); - private final Set<ColumnTypePair> _columnValueLoaded = new HashSet<>(); + private final Map<FieldSpec.DataType, Set<String>> _columnValueLoaded = new EnumMap<>(FieldSpec.DataType.class); private final Set<String> _columnNumValuesLoaded = new HashSet<>(); // Buffer for data private final Map<String, Object> _dictIdsMap = new HashMap<>(); - private final Map<ColumnTypePair, Object> _valuesMap = new HashMap<>(); + private final Map<FieldSpec.DataType, Map<String, Object>> _valuesMap = new HashMap<>(); private final Map<String, int[]> _numValuesMap = new HashMap<>(); private int[] _docIds; @@ -65,9 +64,10 @@ public class DataBlockCache { public void initNewBlock(int[] docIds, int length) { _docIds = docIds; _length = length; - _columnDictIdLoaded.clear(); - _columnValueLoaded.clear(); + for (Set<String> columns : _columnValueLoaded.values()) { + columns.clear(); + } _columnNumValuesLoaded.clear(); } @@ -109,12 +109,11 @@ public class DataBlockCache { * @return Array of int values */ public int[] getIntValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.INT); - int[] intValues = (int[]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + int[] intValues = getValues(FieldSpec.DataType.INT, column); + if (markLoaded(FieldSpec.DataType.INT, column)) { if (intValues == null) { intValues = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL]; - _valuesMap.put(key, intValues); + putValues(FieldSpec.DataType.INT, column, intValues); } _dataFetcher.fetchIntValues(column, _docIds, _length, intValues); } @@ -139,12 +138,11 @@ public class DataBlockCache { * @return Array of long values */ public long[] getLongValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.LONG); - long[] longValues = (long[]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + long[] longValues = getValues(FieldSpec.DataType.LONG, column); + if (markLoaded(FieldSpec.DataType.LONG, column)) { if (longValues == null) { longValues = new long[DocIdSetPlanNode.MAX_DOC_PER_CALL]; - _valuesMap.put(key, longValues); + putValues(FieldSpec.DataType.LONG, column, longValues); } _dataFetcher.fetchLongValues(column, _docIds, _length, longValues); } @@ -169,12 +167,11 @@ public class DataBlockCache { * @return Array of float values */ public float[] getFloatValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.FLOAT); - float[] floatValues = (float[]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + float[] floatValues = getValues(FieldSpec.DataType.FLOAT, column); + if (markLoaded(FieldSpec.DataType.FLOAT, column)) { if (floatValues == null) { floatValues = new float[DocIdSetPlanNode.MAX_DOC_PER_CALL]; - _valuesMap.put(key, floatValues); + putValues(FieldSpec.DataType.FLOAT, column, floatValues); } _dataFetcher.fetchFloatValues(column, _docIds, _length, floatValues); } @@ -199,12 +196,11 @@ public class DataBlockCache { * @return Array of double values */ public double[] getDoubleValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.DOUBLE); - double[] doubleValues = (double[]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + double[] doubleValues = getValues(FieldSpec.DataType.DOUBLE, column); + if (markLoaded(FieldSpec.DataType.DOUBLE, column)) { if (doubleValues == null) { doubleValues = new double[DocIdSetPlanNode.MAX_DOC_PER_CALL]; - _valuesMap.put(key, doubleValues); + putValues(FieldSpec.DataType.DOUBLE, column, doubleValues); } _dataFetcher.fetchDoubleValues(column, _docIds, _length, doubleValues); } @@ -229,12 +225,11 @@ public class DataBlockCache { * @return Array of string values */ public String[] getStringValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.STRING); - String[] stringValues = (String[]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + String[] stringValues = getValues(FieldSpec.DataType.STRING, column); + if (markLoaded(FieldSpec.DataType.STRING, column)) { if (stringValues == null) { stringValues = new String[DocIdSetPlanNode.MAX_DOC_PER_CALL]; - _valuesMap.put(key, stringValues); + putValues(FieldSpec.DataType.STRING, column, stringValues); } _dataFetcher.fetchStringValues(column, _docIds, _length, stringValues); } @@ -259,13 +254,11 @@ public class DataBlockCache { * @return byte[] for the column */ public byte[][] getBytesValuesForSVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.BYTES); - byte[][] bytesValues = (byte[][]) _valuesMap.get(key); - - if (_columnValueLoaded.add(key)) { + byte[][] bytesValues = getValues(FieldSpec.DataType.BYTES, column); + if (markLoaded(FieldSpec.DataType.BYTES, column)) { if (bytesValues == null) { bytesValues = new byte[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, bytesValues); + putValues(FieldSpec.DataType.BYTES, column, bytesValues); } _dataFetcher.fetchBytesValues(column, _docIds, _length, bytesValues); } @@ -301,12 +294,11 @@ public class DataBlockCache { * @return Array of int values */ public int[][] getIntValuesForMVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.INT); - int[][] intValues = (int[][]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + int[][] intValues = getValues(FieldSpec.DataType.INT, column); + if (markLoaded(FieldSpec.DataType.INT, column)) { if (intValues == null) { intValues = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, intValues); + putValues(FieldSpec.DataType.INT, column, intValues); } _dataFetcher.fetchIntValues(column, _docIds, _length, intValues); } @@ -331,12 +323,11 @@ public class DataBlockCache { * @return Array of long values */ public long[][] getLongValuesForMVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.LONG); - long[][] longValues = (long[][]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + long[][] longValues = getValues(FieldSpec.DataType.LONG, column); + if (markLoaded(FieldSpec.DataType.LONG, column)) { if (longValues == null) { longValues = new long[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, longValues); + putValues(FieldSpec.DataType.LONG, column, longValues); } _dataFetcher.fetchLongValues(column, _docIds, _length, longValues); } @@ -361,12 +352,11 @@ public class DataBlockCache { * @return Array of float values */ public float[][] getFloatValuesForMVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.FLOAT); - float[][] floatValues = (float[][]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + float[][] floatValues = getValues(FieldSpec.DataType.FLOAT, column); + if (markLoaded(FieldSpec.DataType.FLOAT, column)) { if (floatValues == null) { floatValues = new float[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, floatValues); + putValues(FieldSpec.DataType.FLOAT, column, floatValues); } _dataFetcher.fetchFloatValues(column, _docIds, _length, floatValues); } @@ -391,12 +381,11 @@ public class DataBlockCache { * @return Array of double values */ public double[][] getDoubleValuesForMVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.DOUBLE); - double[][] doubleValues = (double[][]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + double[][] doubleValues = getValues(FieldSpec.DataType.DOUBLE, column); + if (markLoaded(FieldSpec.DataType.DOUBLE, column)) { if (doubleValues == null) { doubleValues = new double[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, doubleValues); + putValues(FieldSpec.DataType.DOUBLE, column, doubleValues); } _dataFetcher.fetchDoubleValues(column, _docIds, _length, doubleValues); } @@ -421,12 +410,11 @@ public class DataBlockCache { * @return Array of string values */ public String[][] getStringValuesForMVColumn(String column) { - ColumnTypePair key = new ColumnTypePair(column, FieldSpec.DataType.STRING); - String[][] stringValues = (String[][]) _valuesMap.get(key); - if (_columnValueLoaded.add(key)) { + String[][] stringValues = getValues(FieldSpec.DataType.STRING, column); + if (markLoaded(FieldSpec.DataType.STRING, column)) { if (stringValues == null) { stringValues = new String[DocIdSetPlanNode.MAX_DOC_PER_CALL][]; - _valuesMap.put(key, stringValues); + putValues(FieldSpec.DataType.STRING, column, stringValues); } _dataFetcher.fetchStringValues(column, _docIds, _length, stringValues); } @@ -462,28 +450,16 @@ public class DataBlockCache { return numValues; } - /** - * Helper class to store pair of column name and data type. - */ - private static class ColumnTypePair { - final String _column; - final FieldSpec.DataType _dataType; - - ColumnTypePair(@Nonnull String column, @Nonnull FieldSpec.DataType dataType) { - _column = column; - _dataType = dataType; - } + private boolean markLoaded(FieldSpec.DataType dataType, String column) { + return _columnValueLoaded.computeIfAbsent(dataType, k -> new HashSet<>()).add(column); + } - @Override - public int hashCode() { - return EqualityUtils.hashCodeOf(_column.hashCode(), _dataType.hashCode()); - } + @SuppressWarnings("unchecked") + private <T> T getValues(FieldSpec.DataType dataType, String column) { + return (T) _valuesMap.computeIfAbsent(dataType, k -> new HashMap<>()).get(column); + } - @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") - @Override - public boolean equals(Object obj) { - ColumnTypePair that = (ColumnTypePair) obj; - return _column.equals(that._column) && _dataType == that._dataType; - } + private void putValues(FieldSpec.DataType dataType, String column, Object values) { + _valuesMap.get(dataType).put(column, values); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org