KKcorps commented on code in PR #9294: URL: https://github.com/apache/pinot/pull/9294#discussion_r958078388
########## pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetRecordReader.java: ########## @@ -32,17 +38,34 @@ * It has two implementations: {@link ParquetAvroRecordReader} (Default) and {@link ParquetNativeRecordReader}. */ public class ParquetRecordReader implements RecordReader { + private static final String AVRO_SCHEMA_METADATA_KEY = "parquet.avro.schema"; + private RecordReader _internalParquetRecordReader; private boolean _useAvroParquetRecordReader = true; @Override public void init(File dataFile, @Nullable Set<String> fieldsToRead, @Nullable RecordReaderConfig recordReaderConfig) throws IOException { - if (recordReaderConfig == null || ((ParquetRecordReaderConfig) recordReaderConfig).useParquetAvroRecordReader()) { + if (recordReaderConfig != null && ((ParquetRecordReaderConfig) recordReaderConfig).useParquetAvroRecordReader()) { _internalParquetRecordReader = new ParquetAvroRecordReader(); - } else { + } else if (recordReaderConfig != null + && ((ParquetRecordReaderConfig) recordReaderConfig).useParquetNativeRecordReader()) { _useAvroParquetRecordReader = false; _internalParquetRecordReader = new ParquetNativeRecordReader(); + } else { + // No reader type specified. Determine using file metadata + ParquetMetadata parquetMetadata = + ParquetFileReader.readFooter(new Configuration(), new Path(dataFile.getAbsolutePath()), + ParquetMetadataConverter.NO_FILTER); + Map<String, String> fileKeyValueMeta = parquetMetadata.getFileMetaData().getKeyValueMetaData(); + boolean useParquetAvroRecordReader = fileKeyValueMeta.containsKey(AVRO_SCHEMA_METADATA_KEY); Review Comment: Might throw null pointer exception here if `fileKeyValueMeta` is null. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org