rchowell commented on code in PR #1920:
URL: https://github.com/apache/iceberg-python/pull/1920#discussion_r2052639531


##########
pyiceberg/catalog/glue.py:
##########
@@ -303,32 +303,46 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: 
Any) -> None:
 
 
 class GlueCatalog(MetastoreCatalog):
-    def __init__(self, name: str, **properties: Any):
-        super().__init__(name, **properties)
+    glue: GlueClient
 
-        retry_mode_prop_value = get_first_property_value(properties, 
GLUE_RETRY_MODE)
+    def __init__(self, name: str, client: Optional[GlueClient] = None, 
**properties: Any):
+        """Glue Catalog.
 
-        session = boto3.Session(
-            profile_name=properties.get(GLUE_PROFILE_NAME),
-            region_name=get_first_property_value(properties, GLUE_REGION, 
AWS_REGION),
-            botocore_session=properties.get(BOTOCORE_SESSION),
-            aws_access_key_id=get_first_property_value(properties, 
GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
-            aws_secret_access_key=get_first_property_value(properties, 
GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
-            aws_session_token=get_first_property_value(properties, 
GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
-        )
-        self.glue: GlueClient = session.client(
-            "glue",
-            endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
-            config=Config(
-                retries={
-                    "max_attempts": properties.get(GLUE_MAX_RETRIES, 
MAX_RETRIES),
-                    "mode": retry_mode_prop_value if retry_mode_prop_value in 
EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
-                }
-            ),
-        )
+        You either need to provide a boto3 glue client, or one will be 
constructed from the properties.
+
+        Args:
+            name: Name to identify the catalog.
+            client: An optional boto3 glue client.
+            properties: Properties for glue client construction and 
configuration.
+        """
+        super().__init__(name, **properties)
+
+        if client:
+            self.glue = client
+        else:
+            retry_mode_prop_value = get_first_property_value(properties, 
GLUE_RETRY_MODE)
+
+            session = boto3.Session(
+                profile_name=properties.get(GLUE_PROFILE_NAME),
+                region_name=get_first_property_value(properties, GLUE_REGION, 
AWS_REGION),
+                botocore_session=properties.get(BOTOCORE_SESSION),
+                aws_access_key_id=get_first_property_value(properties, 
GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+                aws_secret_access_key=get_first_property_value(properties, 
GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+                aws_session_token=get_first_property_value(properties, 
GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN),
+            )
+            self.glue: GlueClient = session.client(
+                "glue",
+                endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT),
+                config=Config(
+                    retries={
+                        "max_attempts": properties.get(GLUE_MAX_RETRIES, 
MAX_RETRIES),
+                        "mode": retry_mode_prop_value if retry_mode_prop_value 
in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE,
+                    }
+                ),
+            )
 
-        if glue_catalog_id := properties.get(GLUE_ID):
-            _register_glue_catalog_id_with_glue_client(self.glue, 
glue_catalog_id)
+            if glue_catalog_id := properties.get(GLUE_ID):
+                _register_glue_catalog_id_with_glue_client(self.glue, 
glue_catalog_id)

Review Comment:
   @kevinjqliu good question, I left this out intentionally so that we do not 
make any modifications to the customer's input client / impede on their 
existing events. Since the catalog_id parameter is optional, it doesn't make a 
functional difference afaik. Something to consider is using the [unique_id 
arg](https://github.com/boto/botocore/blob/aaa6690e45c8dabcde3a8d2d1aa34b5fd399fba7/botocore/hooks.py#L89)
 when registering an event. Let me know what you think, and I can follow-up 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to