Jackie-Jiang commented on code in PR #10891: URL: https://github.com/apache/pinot/pull/10891#discussion_r1248658614
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java: ########## @@ -119,65 +126,257 @@ private List<String> getColumnsToAddMinMaxValue() { private boolean needAddColumnMinMaxValueForColumn(String columnName) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName); - return columnMetadata.hasDictionary() && columnMetadata.getMinValue() == null - && columnMetadata.getMaxValue() == null && !columnMetadata.isMinMaxValueInvalid(); + return columnMetadata.getMinValue() == null && columnMetadata.getMaxValue() == null + && !columnMetadata.isMinMaxValueInvalid(); } private void addColumnMinMaxValueForColumn(String columnName) throws Exception { - // Skip column without dictionary or with min/max value already set + // Skip column with min/max value already set ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName); - if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null - || columnMetadata.getMaxValue() != null) { + if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() != null) { return; } - PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary()); DataType dataType = columnMetadata.getDataType().getStoredType(); - int length = columnMetadata.getCardinality(); - switch (dataType) { - case INT: - try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, length)) { - SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, - intDictionary.getStringValue(0), intDictionary.getStringValue(length - 1)); - } - break; - case LONG: - try (LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, length)) { + if (columnMetadata.hasDictionary()) { + PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary()); + int length = columnMetadata.getCardinality(); + switch (dataType) { + case INT: + try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + intDictionary.getStringValue(0), intDictionary.getStringValue(length - 1)); + } + break; + case LONG: + try (LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + longDictionary.getStringValue(0), longDictionary.getStringValue(length - 1)); + } + break; + case FLOAT: + try (FloatDictionary floatDictionary = new FloatDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + floatDictionary.getStringValue(0), floatDictionary.getStringValue(length - 1)); + } + break; + case DOUBLE: + try (DoubleDictionary doubleDictionary = new DoubleDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + doubleDictionary.getStringValue(0), doubleDictionary.getStringValue(length - 1)); + } + break; + case STRING: + try (StringDictionary stringDictionary = new StringDictionary(dictionaryBuffer, length, + columnMetadata.getColumnMaxLength())) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + stringDictionary.getStringValue(0), stringDictionary.getStringValue(length - 1)); + } + break; + case BYTES: + try (BytesDictionary bytesDictionary = new BytesDictionary(dictionaryBuffer, length, + columnMetadata.getColumnMaxLength())) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + bytesDictionary.getStringValue(0), bytesDictionary.getStringValue(length - 1)); + } + break; + default: + throw new IllegalStateException("Unsupported data type: " + dataType + " for column: " + columnName); + } + } else { + // setting min/max for non-dictionary columns. + int numDocs = columnMetadata.getTotalDocs(); + boolean isSingleValueField = _segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField(); + PinotDataBuffer forwardBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.forward()); + switch (dataType) { + case INT: { + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + if (isSingleValueField) { + FixedByteChunkSVForwardIndexReader rawIndexReader = new FixedByteChunkSVForwardIndexReader(forwardBuffer, + DataType.INT); + ChunkReaderContext readerContext = rawIndexReader.createContext(); Review Comment: We want to close both the reader and the context. You may use the try-with-resource block ```suggestion try (FixedByteChunkSVForwardIndexReader rawIndexReader = new FixedByteChunkSVForwardIndexReader(forwardBuffer, DataType.INT); ChunkReaderContext readerContext = rawIndexReader.createContext() ) { ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java: ########## @@ -119,65 +126,257 @@ private List<String> getColumnsToAddMinMaxValue() { private boolean needAddColumnMinMaxValueForColumn(String columnName) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName); - return columnMetadata.hasDictionary() && columnMetadata.getMinValue() == null - && columnMetadata.getMaxValue() == null && !columnMetadata.isMinMaxValueInvalid(); + return columnMetadata.getMinValue() == null && columnMetadata.getMaxValue() == null + && !columnMetadata.isMinMaxValueInvalid(); } private void addColumnMinMaxValueForColumn(String columnName) throws Exception { - // Skip column without dictionary or with min/max value already set + // Skip column with min/max value already set ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName); - if (!columnMetadata.hasDictionary() || columnMetadata.getMinValue() != null - || columnMetadata.getMaxValue() != null) { + if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() != null) { return; } - PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary()); DataType dataType = columnMetadata.getDataType().getStoredType(); - int length = columnMetadata.getCardinality(); - switch (dataType) { - case INT: - try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, length)) { - SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, - intDictionary.getStringValue(0), intDictionary.getStringValue(length - 1)); - } - break; - case LONG: - try (LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, length)) { + if (columnMetadata.hasDictionary()) { + PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary()); + int length = columnMetadata.getCardinality(); + switch (dataType) { + case INT: + try (IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + intDictionary.getStringValue(0), intDictionary.getStringValue(length - 1)); + } + break; + case LONG: + try (LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + longDictionary.getStringValue(0), longDictionary.getStringValue(length - 1)); + } + break; + case FLOAT: + try (FloatDictionary floatDictionary = new FloatDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + floatDictionary.getStringValue(0), floatDictionary.getStringValue(length - 1)); + } + break; + case DOUBLE: + try (DoubleDictionary doubleDictionary = new DoubleDictionary(dictionaryBuffer, length)) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + doubleDictionary.getStringValue(0), doubleDictionary.getStringValue(length - 1)); + } + break; + case STRING: + try (StringDictionary stringDictionary = new StringDictionary(dictionaryBuffer, length, + columnMetadata.getColumnMaxLength())) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + stringDictionary.getStringValue(0), stringDictionary.getStringValue(length - 1)); + } + break; + case BYTES: + try (BytesDictionary bytesDictionary = new BytesDictionary(dictionaryBuffer, length, + columnMetadata.getColumnMaxLength())) { + SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, + bytesDictionary.getStringValue(0), bytesDictionary.getStringValue(length - 1)); + } + break; + default: + throw new IllegalStateException("Unsupported data type: " + dataType + " for column: " + columnName); + } + } else { + // setting min/max for non-dictionary columns. + int numDocs = columnMetadata.getTotalDocs(); + boolean isSingleValueField = _segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField(); + PinotDataBuffer forwardBuffer = _segmentWriter.getIndexFor(columnName, StandardIndexes.forward()); + switch (dataType) { + case INT: { + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + if (isSingleValueField) { + FixedByteChunkSVForwardIndexReader rawIndexReader = new FixedByteChunkSVForwardIndexReader(forwardBuffer, + DataType.INT); + ChunkReaderContext readerContext = rawIndexReader.createContext(); + for (int docs = 0; docs < numDocs; docs++) { Review Comment: (nit) use `i` or `docId` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org