Jackie-Jiang commented on code in PR #15591: URL: https://github.com/apache/pinot/pull/15591#discussion_r2080514969
########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexService.java: ########## @@ -58,6 +59,7 @@ public class IndexService { private final List<IndexType<?, ?, ?>> _allIndexes; private final Map<String, IndexType<?, ?, ?>> _allIndexesById; + private final Object2ShortOpenHashMap _allIndexPosById; Review Comment: (minor) ```suggestion private final Object2ShortOpenHashMap<String> _allIndexPosById; ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java: ########## @@ -51,36 +53,115 @@ public PhysicalColumnIndexContainer(SegmentDirectory.Reader segmentReader, Colum fieldIndexConfigs = FieldIndexConfigs.EMPTY; } - _readersByIndex = new HashMap<>(); - for (IndexType<?, ?, ?> indexType : IndexService.getInstance().getAllIndexes()) { - if (segmentReader.hasIndexFor(columnName, indexType)) { - IndexReaderFactory<?> readerProvider = indexType.getReaderFactory(); - try { - IndexReader reader = readerProvider.createIndexReader(segmentReader, fieldIndexConfigs, metadata); - if (reader != null) { - _readersByIndex.put(indexType, reader); + ArrayList<IndexType> indexTypes = new ArrayList<>(); + ArrayList<IndexReader> readers = new ArrayList<>(); + + try { + for (IndexType<?, ?, ?> indexType : IndexService.getInstance().getAllIndexes()) { + if (segmentReader.hasIndexFor(columnName, indexType)) { + IndexReaderFactory<?> readerProvider = indexType.getReaderFactory(); + try { + IndexReader reader = readerProvider.createIndexReader(segmentReader, fieldIndexConfigs, metadata); + if (reader != null) { + indexTypes.add(indexType); + readers.add(reader); + } + } catch (IndexReaderConstraintException ex) { + LOGGER.warn("Constraint violation when indexing {} with {} index", columnName, indexType, ex); } - } catch (IndexReaderConstraintException ex) { - LOGGER.warn("Constraint violation when indexing {} with {} index", columnName, indexType, ex); } } + } catch (Throwable t) { + for (IndexReader reader : readers) { + try { + reader.close(); + } catch (Throwable ct) { + LOGGER.warn("Can't close reader on init error, column: " + columnName + " reader: " + reader.getClass(), ct); + } + } + throw t; } + + _indexTypeMap = IndexTypeMap.get(indexTypes, readers); } + @SuppressWarnings("unchecked") @Nullable @Override public <I extends IndexReader, T extends IndexType<?, I, ?>> I getIndex(T indexType) { - @SuppressWarnings("unchecked") - I reader = (I) _readersByIndex.get(indexType); - return reader; + return _indexTypeMap.getIndex(indexType); } @Override public void close() throws IOException { // TODO (index-spi): Verify that readers can be closed in any order - for (IndexReader index : _readersByIndex.values()) { - index.close(); + _indexTypeMap.close(); + } + + static class IndexTypeMap implements Closeable { + private static final IndexReader[] EMPTY_READERS = new IndexReader[0]; + + public static final IndexTypeMap EMPTY = new IndexTypeMap((short) 0, EMPTY_READERS); + + private final short _shift; + //stores index readers ordered by index id, shifted by _shift to conserve memory + private final IndexReader[] _readers; + + private IndexTypeMap(short shift, IndexReader[] readers) { + _shift = shift; + _readers = readers; + } + + static IndexTypeMap get(List<IndexType> indexTypes, List<IndexReader> readers) { + if (indexTypes.isEmpty()) { + return EMPTY; + } + + short min = Short.MAX_VALUE; + int max = -1; + + ShortArrayList indexIds = new ShortArrayList(indexTypes.size()); + IndexService indexService = IndexService.getInstance(); + + for (int i = 0, n = indexTypes.size(); i < n; i++) { Review Comment: (minor) ```suggestion for (IndexType indexType : indexTypes) { ``` ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -90,7 +90,23 @@ default boolean isMinMaxValueInvalid() { @Nullable Set<Integer> getPartitions(); - Map<IndexType<?, ?, ?>, Long> getIndexSizeMap(); + long getIndexSizeFor(IndexType type); + + default void addIndexSize(short indexType, long size) { + // do nothing + } + + default short getIndexType(int i) { + return (short) -1; + } + + default long getIndexSize(int i) { + return -1; + } + + default int getIndexTypeSizesCount() { Review Comment: Consider renaming ```suggestion default int getNumIndexes { ``` ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -90,7 +90,23 @@ default boolean isMinMaxValueInvalid() { @Nullable Set<Integer> getPartitions(); - Map<IndexType<?, ?, ?>, Long> getIndexSizeMap(); + long getIndexSizeFor(IndexType type); Review Comment: Consider adding a default impl for it ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -90,7 +90,23 @@ default boolean isMinMaxValueInvalid() { @Nullable Set<Integer> getPartitions(); - Map<IndexType<?, ?, ?>, Long> getIndexSizeMap(); + long getIndexSizeFor(IndexType type); + + default void addIndexSize(short indexType, long size) { Review Comment: The methods in this interface should be read-only ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java: ########## @@ -164,10 +176,58 @@ public Set<Integer> getPartitions() { return _partitions; } - @Nullable @Override - public Map<IndexType<?, ?, ?>, Long> getIndexSizeMap() { - return _indexSizeMap; + public long getIndexSizeFor(IndexType type) { + short indexId = IndexService.getInstance().getNumericId(type); + for (int i = 0, n = getIndexTypeSizesCount(); i < n; i++) { + if (indexId == getIndexType(i)) { + return getIndexSize(i); + } + } + + return INDEX_NOT_FOUND; + } + + // size should be non-negative 48-bit value + public void addIndexSize(short indexType, long size) { + if (size < 0 || size > SIZE_MASK) { + throw new IllegalArgumentException( + "Index size should be a non-negative integer value between 0 and " + SIZE_MASK); + } + long typeAndSize = ((long) indexType) << 48 | (size & SIZE_MASK); + _indexTypeSizeList.add(typeAndSize); + } + + @Override + public short getIndexType(int i) { + return (short) ((_indexTypeSizeList.getLong(i) >> 48) & 0xffff); Review Comment: (minor) ```suggestion return (short) (_indexTypeSizeList.getLong(i) >>> 48); ``` ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -36,6 +35,7 @@ @InterfaceAudience.Private @SuppressWarnings("rawtypes") public interface ColumnMetadata { + int INDEX_NOT_FOUND = -1; Review Comment: What is the contract around this constant? Seems it is used as the index size when index doesn't exist? ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java: ########## @@ -355,7 +415,9 @@ public static class Builder { private PartitionFunction _partitionFunction; private Set<Integer> _partitions; private boolean _autoGenerated; - private Map<IndexType<?, ?, ?>, Long> _indexSizeMap = new HashMap<>(); + + // use non-default size to save space for most columns + LongArrayList _indexSizeList = new LongArrayList(2); Review Comment: This should be updated in the builder ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -90,7 +90,23 @@ default boolean isMinMaxValueInvalid() { @Nullable Set<Integer> getPartitions(); - Map<IndexType<?, ?, ?>, Long> getIndexSizeMap(); + long getIndexSizeFor(IndexType type); + + default void addIndexSize(short indexType, long size) { + // do nothing + } + + default short getIndexType(int i) { + return (short) -1; + } + + default long getIndexSize(int i) { + return -1; + } + + default int getIndexTypeSizesCount() { + return 0; + } Review Comment: Please add javadoc for each of them as they are not really self-explained ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/ColumnMetadata.java: ########## @@ -90,7 +90,23 @@ default boolean isMinMaxValueInvalid() { @Nullable Set<Integer> getPartitions(); - Map<IndexType<?, ?, ?>, Long> getIndexSizeMap(); + long getIndexSizeFor(IndexType type); Review Comment: This seems never used in production code ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/IndexService.java: ########## @@ -72,6 +74,12 @@ private IndexService(Set<IndexPlugin<?>> allPlugins) { Collections.sort(allIndexIds); _allIndexes = new ArrayList<>(); allIndexIds.forEach(id -> _allIndexes.add(_allIndexesById.get(id))); + + _allIndexPosById = new Object2ShortOpenHashMap(allIndexIds.size()); + _allIndexPosById.defaultReturnValue((short) -1); Review Comment: Consider making this `-1` a constant for readability ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java: ########## @@ -164,10 +176,58 @@ public Set<Integer> getPartitions() { return _partitions; } - @Nullable @Override - public Map<IndexType<?, ?, ?>, Long> getIndexSizeMap() { - return _indexSizeMap; + public long getIndexSizeFor(IndexType type) { + short indexId = IndexService.getInstance().getNumericId(type); + for (int i = 0, n = getIndexTypeSizesCount(); i < n; i++) { + if (indexId == getIndexType(i)) { + return getIndexSize(i); + } + } + + return INDEX_NOT_FOUND; + } + + // size should be non-negative 48-bit value + public void addIndexSize(short indexType, long size) { + if (size < 0 || size > SIZE_MASK) { + throw new IllegalArgumentException( + "Index size should be a non-negative integer value between 0 and " + SIZE_MASK); + } + long typeAndSize = ((long) indexType) << 48 | (size & SIZE_MASK); + _indexTypeSizeList.add(typeAndSize); + } + + @Override + public short getIndexType(int i) { + return (short) ((_indexTypeSizeList.getLong(i) >> 48) & 0xffff); + } + + @Override + public long getIndexSize(int i) { + return (_indexTypeSizeList.getLong(i) & SIZE_MASK); + } + + @JsonIgnore + @Override + public int getIndexTypeSizesCount() { + return _indexTypeSizeList.size(); + } + + // This method is only meant to retain compatibility of deserialization for + // /tables/{tableName}/segments/{segmentName}/metadata endpoint . + @SuppressWarnings("unused") + public JsonNode getIndexSizeMap() { Review Comment: Are we ever serialize/deserialize column metadata as json? It is not designed to do so -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org