rchowell commented on code in PR #1920: URL: https://github.com/apache/iceberg-python/pull/1920#discussion_r2052639531
########## pyiceberg/catalog/glue.py: ########## @@ -303,32 +303,46 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: Any) -> None: class GlueCatalog(MetastoreCatalog): - def __init__(self, name: str, **properties: Any): - super().__init__(name, **properties) + glue: GlueClient - retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + def __init__(self, name: str, client: Optional[GlueClient] = None, **properties: Any): + """Glue Catalog. - session = boto3.Session( - profile_name=properties.get(GLUE_PROFILE_NAME), - region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), - botocore_session=properties.get(BOTOCORE_SESSION), - aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), - aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), - aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), - ) - self.glue: GlueClient = session.client( - "glue", - endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), - config=Config( - retries={ - "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), - "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, - } - ), - ) + You either need to provide a boto3 glue client, or one will be constructed from the properties. + + Args: + name: Name to identify the catalog. + client: An optional boto3 glue client. + properties: Properties for glue client construction and configuration. + """ + super().__init__(name, **properties) + + if client: + self.glue = client + else: + retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + + session = boto3.Session( + profile_name=properties.get(GLUE_PROFILE_NAME), + region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), + botocore_session=properties.get(BOTOCORE_SESSION), + aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), + aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), + aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), + ) + self.glue: GlueClient = session.client( + "glue", + endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), + config=Config( + retries={ + "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), + "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, + } + ), + ) - if glue_catalog_id := properties.get(GLUE_ID): - _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) + if glue_catalog_id := properties.get(GLUE_ID): + _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) Review Comment: @kevinjqliu good question, I left this out intentionally so that we do not make any modifications to the customer's input client / impede on their existing events. Since the catalog_id parameter is optional, it doesn't make a functional difference afaik. Something to consider is using the [unique_id arg](https://github.com/boto/botocore/blob/aaa6690e45c8dabcde3a8d2d1aa34b5fd399fba7/botocore/hooks.py#L89) when registering an event. Let me know what you think, and I can follow-up 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org