singhpk234 commented on code in PR #9884: URL: https://github.com/apache/iceberg/pull/9884#discussion_r1514994609
########## core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java: ########## @@ -53,20 +53,22 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO { private static final String S3_FILE_IO_IMPL = "org.apache.iceberg.aws.s3.S3FileIO"; private static final String GCS_FILE_IO_IMPL = "org.apache.iceberg.gcp.gcs.GCSFileIO"; private static final String ADLS_FILE_IO_IMPL = "org.apache.iceberg.azure.adlsv2.ADLSFileIO"; - private static final Map<String, String> SCHEME_TO_FILE_IO = + private static final Map<String, String> DEFAULT_SCHEME_TO_FILE_IO = ImmutableMap.of( "s3", S3_FILE_IO_IMPL, "s3a", S3_FILE_IO_IMPL, "s3n", S3_FILE_IO_IMPL, "gs", GCS_FILE_IO_IMPL, "abfs", ADLS_FILE_IO_IMPL, "abfss", ADLS_FILE_IO_IMPL); + private static final String SCHEME_PROPERTY_PREFIX = "resolving-io.schemes."; private final Map<String, DelegateFileIO> ioInstances = Maps.newConcurrentMap(); private final AtomicBoolean isClosed = new AtomicBoolean(false); private final transient StackTraceElement[] createStack; private SerializableMap<String, String> properties; private SerializableSupplier<Configuration> hadoopConf; + private final Map<String, String> schemeToFileIo = Maps.newHashMap(DEFAULT_SCHEME_TO_FILE_IO); Review Comment: ```suggestion private final Map<String, String> schemeToFileIO = Maps.newHashMap(DEFAULT_SCHEME_TO_FILE_IO); ``` ########## core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java: ########## @@ -122,11 +124,20 @@ public void initialize(Map<String, String> newProperties) { close(); // close and discard any existing FileIO instances this.properties = SerializableMap.copyOf(newProperties); isClosed.set(false); + // load custom IO scheme handlers (starting with resolving-io.schemes. prefix) + for (String key : this.properties.keySet()) { + if (key.startsWith(SCHEME_PROPERTY_PREFIX)) { + this.schemeToFileIo.put( + key.substring(SCHEME_PROPERTY_PREFIX.length()), this.properties.get(key)); + } + } Review Comment: We should use https://github.com/apache/iceberg/blob/f0b3733e0e7071894bdf63133bcc8c00754a1080/core/src/main/java/org/apache/iceberg/util/PropertyUtil.java#L111 ########## core/src/test/java/org/apache/iceberg/io/TestResolvingIO.java: ########## @@ -182,4 +182,16 @@ public void delegateFileIOWithAndWithoutMixins() { // being null is ok here as long as the code doesn't throw an exception assertThat(resolvingFileIO.newInputFile("/file")).isNull(); } + + @Test + public void customFileIOScheme() { Review Comment: should we add an UT for s3 override with Custom File IO impl ? essentially testing that hashmap of props are loaded post the default initialization. ########## core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java: ########## @@ -53,20 +53,22 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO { private static final String S3_FILE_IO_IMPL = "org.apache.iceberg.aws.s3.S3FileIO"; private static final String GCS_FILE_IO_IMPL = "org.apache.iceberg.gcp.gcs.GCSFileIO"; private static final String ADLS_FILE_IO_IMPL = "org.apache.iceberg.azure.adlsv2.ADLSFileIO"; - private static final Map<String, String> SCHEME_TO_FILE_IO = + private static final Map<String, String> DEFAULT_SCHEME_TO_FILE_IO = ImmutableMap.of( "s3", S3_FILE_IO_IMPL, "s3a", S3_FILE_IO_IMPL, "s3n", S3_FILE_IO_IMPL, "gs", GCS_FILE_IO_IMPL, "abfs", ADLS_FILE_IO_IMPL, "abfss", ADLS_FILE_IO_IMPL); + private static final String SCHEME_PROPERTY_PREFIX = "resolving-io.schemes."; private final Map<String, DelegateFileIO> ioInstances = Maps.newConcurrentMap(); private final AtomicBoolean isClosed = new AtomicBoolean(false); private final transient StackTraceElement[] createStack; private SerializableMap<String, String> properties; private SerializableSupplier<Configuration> hadoopConf; + private final Map<String, String> schemeToFileIo = Maps.newConcurrentMap(); Review Comment: [doubt] why do we need a concurrentMap here ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org