pvary commented on code in PR #12298: URL: https://github.com/apache/iceberg/pull/12298#discussion_r2001145882
########## core/src/main/java/org/apache/iceberg/io/datafile/DataFileServiceRegistry.java: ########## @@ -0,0 +1,450 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.io.datafile; + +import java.util.Map; +import java.util.ServiceLoader; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.Schema; +import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Objects; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Registry which maintains the available {@link ReaderService} and {@link WriterService} + * implementations. Based on the `file format`, the required `data type` and the reader/writer + * `builderType` the registry returns the correct reader and writer service implementations. These + * services could be used to generate the correct reader and writer builders. + */ +public final class DataFileServiceRegistry { + private static final Logger LOG = LoggerFactory.getLogger(DataFileServiceRegistry.class); + + private DataFileServiceRegistry() {} + + /** + * Provides a reader for the given input file which returns objects with a given returnType. + * + * @param format of the file to read + * @param returnType returned by the reader + * @param inputFile to read + * @param readSchema to use when reading the data file + * @return {@link ReaderBuilder} for building the actual reader + */ + public static ReaderBuilder readerBuilder( + FileFormat format, String returnType, InputFile inputFile, Schema readSchema) { + return readerBuilder(format, returnType, null, inputFile, readSchema, ImmutableMap.of(), null); + } + + /** + * Provides a reader for the given input file which returns objects with a given returnType. + * + * @param format of the file to read + * @param returnType returned by the reader + * @param inputFile to read + * @param readSchema to use when reading the data file + * @param idToConstant to use for getting value for constant fields + * @return {@link ReaderBuilder} for building the actual reader + */ + public static ReaderBuilder readerBuilder( + FileFormat format, + String returnType, + InputFile inputFile, + Schema readSchema, + Map<Integer, ?> idToConstant) { + return readerBuilder(format, returnType, null, inputFile, readSchema, idToConstant, null); + } + + /** + * Provides a reader for the given input file which returns objects with a given returnType. + * + * @param format of the file to read + * @param returnType returned by the reader + * @param builderType selects the builder when there are multiple builders for the same format and + * return type + * @param inputFile to read + * @param readSchema to use when reading the data file + * @param idToConstant to use for getting value for constant fields + * @param deleteFilter is used when the delete record filtering is pushed down to the reader + * @return {@link ReaderBuilder} for building the actual reader + */ + public static ReaderBuilder readerBuilder( + FileFormat format, + String returnType, + String builderType, + InputFile inputFile, + Schema readSchema, + Map<Integer, ?> idToConstant, + DeleteFilter<?> deleteFilter) { + return Registry.readerBuilderFor(format, returnType, builderType) + .builder(inputFile, readSchema, idToConstant, deleteFilter); + } + + /** + * Provides an appender builder for the given input file which writes objects with a given + * inputType. + * + * @param format of the file to write + * @param inputType of the rows + * @param outputFile to write + * @param rowType of the native input data + * @return {@link AppenderBuilder} for building the actual appender + */ + public static <S> AppenderBuilder appenderBuilder( + FileFormat format, String inputType, EncryptedOutputFile outputFile, S rowType) { + return appenderBuilder(format, inputType, null, outputFile, rowType); + } + + /** + * Provides an appender builder for the given input file which writes objects with a given + * inputType. + * + * @param format of the file to write + * @param inputType of the rows + * @param builderType selects the builder when there are multiple builders for the same format and + * input type + * @param outputFile to write + * @param rowType of the native input data + * @return {@link AppenderBuilder} for building the actual appender + */ + public static <S> AppenderBuilder appenderBuilder( + FileFormat format, + String inputType, + String builderType, + EncryptedOutputFile outputFile, + S rowType) { + return Registry.writeBuilderFor(format, inputType, builderType) + .appenderBuilder(outputFile, rowType); + } + + /** + * Provides a data writer builder for the given input file which writes objects with a given + * inputType. + * + * @param format of the file to write + * @param inputType of the rows + * @param outputFile to write + * @param rowType of the native input data + * @return {@link DataWriterBuilder} for building the actual writer + */ + public static <S> DataWriterBuilder dataWriterBuilder( + FileFormat format, String inputType, EncryptedOutputFile outputFile, S rowType) { + return dataWriterBuilder(format, inputType, null, outputFile, rowType); + } + + /** + * Provides a data writer builder for the given input file which writes objects with a given + * inputType. + * + * @param format of the file to write + * @param inputType of the rows + * @param builderType selects the builder when there are multiple builders for the same format and + * input type + * @param outputFile to write + * @param rowType of the native input data + * @return {@link DataWriterBuilder} for building the actual writer + */ + public static <S> DataWriterBuilder dataWriterBuilder( + FileFormat format, + String inputType, + String builderType, + EncryptedOutputFile outputFile, + S rowType) { + return Registry.writeBuilderFor(format, inputType, builderType) + .dataWriterBuilder(outputFile, rowType); + } + + /** + * Provides an equality delete writer builder for the given input file which writes objects with a + * given inputType. + * + * @param format of the file to write + * @param inputType of the rows + * @param outputFile to write + * @param rowType of the native input data + * @return {@link AppenderBuilder} for building the actual writer + */ + public static <S, B extends EqualityDeleteWriterBuilder<B>> + EqualityDeleteWriterBuilder<B> equalityDeleteWriterBuilder( Review Comment: The appender doesn't need to know about these, but the file formats and the writer implementations need this -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org