singhpk234 commented on code in PR #9884:
URL: https://github.com/apache/iceberg/pull/9884#discussion_r1514994609


##########
core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java:
##########
@@ -53,20 +53,22 @@ public class ResolvingFileIO implements HadoopConfigurable, 
DelegateFileIO {
   private static final String S3_FILE_IO_IMPL = 
"org.apache.iceberg.aws.s3.S3FileIO";
   private static final String GCS_FILE_IO_IMPL = 
"org.apache.iceberg.gcp.gcs.GCSFileIO";
   private static final String ADLS_FILE_IO_IMPL = 
"org.apache.iceberg.azure.adlsv2.ADLSFileIO";
-  private static final Map<String, String> SCHEME_TO_FILE_IO =
+  private static final Map<String, String> DEFAULT_SCHEME_TO_FILE_IO =
       ImmutableMap.of(
           "s3", S3_FILE_IO_IMPL,
           "s3a", S3_FILE_IO_IMPL,
           "s3n", S3_FILE_IO_IMPL,
           "gs", GCS_FILE_IO_IMPL,
           "abfs", ADLS_FILE_IO_IMPL,
           "abfss", ADLS_FILE_IO_IMPL);
+  private static final String SCHEME_PROPERTY_PREFIX = "resolving-io.schemes.";
 
   private final Map<String, DelegateFileIO> ioInstances = 
Maps.newConcurrentMap();
   private final AtomicBoolean isClosed = new AtomicBoolean(false);
   private final transient StackTraceElement[] createStack;
   private SerializableMap<String, String> properties;
   private SerializableSupplier<Configuration> hadoopConf;
+  private final Map<String, String> schemeToFileIo = 
Maps.newHashMap(DEFAULT_SCHEME_TO_FILE_IO);

Review Comment:
   ```suggestion
     private final Map<String, String> schemeToFileIO = 
Maps.newHashMap(DEFAULT_SCHEME_TO_FILE_IO);
   ```



##########
core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java:
##########
@@ -122,11 +124,20 @@ public void initialize(Map<String, String> newProperties) 
{
     close(); // close and discard any existing FileIO instances
     this.properties = SerializableMap.copyOf(newProperties);
     isClosed.set(false);
+    // load custom IO scheme handlers (starting with resolving-io.schemes. 
prefix)
+    for (String key : this.properties.keySet()) {
+      if (key.startsWith(SCHEME_PROPERTY_PREFIX)) {
+        this.schemeToFileIo.put(
+            key.substring(SCHEME_PROPERTY_PREFIX.length()), 
this.properties.get(key));
+      }
+    }

Review Comment:
   We should use 
   
https://github.com/apache/iceberg/blob/f0b3733e0e7071894bdf63133bcc8c00754a1080/core/src/main/java/org/apache/iceberg/util/PropertyUtil.java#L111



##########
core/src/test/java/org/apache/iceberg/io/TestResolvingIO.java:
##########
@@ -182,4 +182,16 @@ public void delegateFileIOWithAndWithoutMixins() {
     // being null is ok here as long as the code doesn't throw an exception
     assertThat(resolvingFileIO.newInputFile("/file")).isNull();
   }
+
+  @Test
+  public void customFileIOScheme() {

Review Comment:
   should we add an UT for s3 override with Custom File IO impl ? essentially 
testing that hashmap of props are loaded post the default initialization.



##########
core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java:
##########
@@ -53,20 +53,22 @@ public class ResolvingFileIO implements HadoopConfigurable, 
DelegateFileIO {
   private static final String S3_FILE_IO_IMPL = 
"org.apache.iceberg.aws.s3.S3FileIO";
   private static final String GCS_FILE_IO_IMPL = 
"org.apache.iceberg.gcp.gcs.GCSFileIO";
   private static final String ADLS_FILE_IO_IMPL = 
"org.apache.iceberg.azure.adlsv2.ADLSFileIO";
-  private static final Map<String, String> SCHEME_TO_FILE_IO =
+  private static final Map<String, String> DEFAULT_SCHEME_TO_FILE_IO =
       ImmutableMap.of(
           "s3", S3_FILE_IO_IMPL,
           "s3a", S3_FILE_IO_IMPL,
           "s3n", S3_FILE_IO_IMPL,
           "gs", GCS_FILE_IO_IMPL,
           "abfs", ADLS_FILE_IO_IMPL,
           "abfss", ADLS_FILE_IO_IMPL);
+  private static final String SCHEME_PROPERTY_PREFIX = "resolving-io.schemes.";
 
   private final Map<String, DelegateFileIO> ioInstances = 
Maps.newConcurrentMap();
   private final AtomicBoolean isClosed = new AtomicBoolean(false);
   private final transient StackTraceElement[] createStack;
   private SerializableMap<String, String> properties;
   private SerializableSupplier<Configuration> hadoopConf;
+  private final Map<String, String> schemeToFileIo = Maps.newConcurrentMap();

Review Comment:
   [doubt] why do we need a concurrentMap here ? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to