rustyconover commented on code in PR #8304:
URL: https://github.com/apache/iceberg/pull/8304#discussion_r1315218328
##########
python/pyiceberg/catalog/glue.py:
##########
@@ -56,124 +64,109 @@
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
from pyiceberg.typedef import EMPTY_DICT
-BOTO_SESSION_CONFIG_KEYS = ["aws_access_key_id", "aws_secret_access_key",
"aws_session_token", "region_name", "profile_name"]
-
-GLUE_CLIENT = "glue"
-
-
-PROP_GLUE_TABLE = "Table"
-PROP_GLUE_TABLE_TYPE = "TableType"
-PROP_GLUE_TABLE_DESCRIPTION = "Description"
-PROP_GLUE_TABLE_PARAMETERS = "Parameters"
-PROP_GLUE_TABLE_DATABASE_NAME = "DatabaseName"
-PROP_GLUE_TABLE_NAME = "Name"
-PROP_GLUE_TABLE_OWNER = "Owner"
-PROP_GLUE_TABLE_STORAGE_DESCRIPTOR = "StorageDescriptor"
-
-PROP_GLUE_TABLELIST = "TableList"
-
-PROP_GLUE_DATABASE = "Database"
-PROP_GLUE_DATABASE_LIST = "DatabaseList"
-PROP_GLUE_DATABASE_NAME = "Name"
-PROP_GLUE_DATABASE_LOCATION = "LocationUri"
-PROP_GLUE_DATABASE_DESCRIPTION = "Description"
-PROP_GLUE_DATABASE_PARAMETERS = "Parameters"
-
-PROP_GLUE_NEXT_TOKEN = "NextToken"
-
-GLUE_DESCRIPTION_KEY = "comment"
-
def _construct_parameters(metadata_location: str) -> Properties:
return {TABLE_TYPE: ICEBERG.upper(), METADATA_LOCATION: metadata_location}
-def _construct_create_table_input(table_name: str, metadata_location: str,
properties: Properties) -> Dict[str, Any]:
- table_input = {
- PROP_GLUE_TABLE_NAME: table_name,
- PROP_GLUE_TABLE_TYPE: EXTERNAL_TABLE,
- PROP_GLUE_TABLE_PARAMETERS: _construct_parameters(metadata_location),
+def _construct_create_table_input(table_name: str, metadata_location: str,
properties: Properties) -> TableInputTypeDef:
+ table_input: TableInputTypeDef = {
+ "Name": table_name,
+ "TableType": EXTERNAL_TABLE,
+ "Parameters": _construct_parameters(metadata_location),
}
- if table_description := properties.get(GLUE_DESCRIPTION_KEY):
- table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description
+ if "Description" in properties:
+ table_input["Description"] = properties["Description"]
return table_input
-def _construct_rename_table_input(to_table_name: str, glue_table: Dict[str,
Any]) -> Dict[str, Any]:
- rename_table_input = {PROP_GLUE_TABLE_NAME: to_table_name}
+def _construct_rename_table_input(to_table_name: str, glue_table:
TableTypeDef) -> TableInputTypeDef:
+ rename_table_input: TableInputTypeDef = {"Name": to_table_name}
# use the same Glue info to create the new table, pointing to the old
metadata
- if table_type := glue_table.get(PROP_GLUE_TABLE_TYPE):
- rename_table_input[PROP_GLUE_TABLE_TYPE] = table_type
- if table_parameters := glue_table.get(PROP_GLUE_TABLE_PARAMETERS):
- rename_table_input[PROP_GLUE_TABLE_PARAMETERS] = table_parameters
- if table_owner := glue_table.get(PROP_GLUE_TABLE_OWNER):
- rename_table_input[PROP_GLUE_TABLE_OWNER] = table_owner
- if table_storage_descriptor :=
glue_table.get(PROP_GLUE_TABLE_STORAGE_DESCRIPTOR):
- rename_table_input[PROP_GLUE_TABLE_STORAGE_DESCRIPTOR] =
table_storage_descriptor
- if table_description := glue_table.get(PROP_GLUE_TABLE_DESCRIPTION):
- rename_table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description
+ rename_table_input["TableType"] = glue_table["TableType"]
+ if "Owner" in glue_table:
+ rename_table_input["Owner"] = glue_table["Owner"]
+
+ if "Parameters" in glue_table:
+ rename_table_input["Parameters"] = glue_table["Parameters"]
+
+ if "StorageDescriptor" in glue_table:
+ # It turns out the output of StorageDescriptor is not the same as the
input type
+ # because the Column can have a different type, but for now it seems
to work, so
+ # silence the type error.
+ rename_table_input["StorageDescriptor"] =
cast(StorageDescriptorTypeDef, glue_table["StorageDescriptor"])
+
+ if "Description" in glue_table:
+ rename_table_input["Description"] = glue_table["Description"]
+
return rename_table_input
-def _construct_database_input(database_name: str, properties: Properties) ->
Dict[str, Any]:
- database_input: Dict[str, Any] = {PROP_GLUE_DATABASE_NAME: database_name}
+def _construct_database_input(database_name: str, properties: Properties) ->
DatabaseInputTypeDef:
+ database_input: DatabaseInputTypeDef = {"Name": database_name}
parameters = {}
for k, v in properties.items():
- if k == GLUE_DESCRIPTION_KEY:
- database_input[PROP_GLUE_DATABASE_DESCRIPTION] = v
+ if k == "Description":
+ database_input["Description"] = v
elif k == LOCATION:
- database_input[PROP_GLUE_DATABASE_LOCATION] = v
+ database_input["LocationUri"] = v
else:
parameters[k] = v
- database_input[PROP_GLUE_DATABASE_PARAMETERS] = parameters
+ database_input["Parameters"] = parameters
return database_input
class GlueCatalog(Catalog):
- def __init__(self, name: str, **properties: str):
+ def __init__(self, name: str, **properties: Any):
super().__init__(name, **properties)
- session_config = {k: v for k, v in properties.items() if k in
BOTO_SESSION_CONFIG_KEYS}
- session = boto3.Session(**session_config)
- self.glue = session.client(GLUE_CLIENT)
+ session = boto3.Session(
+ profile_name=properties.get("profile_name"),
+ region_name=properties.get("region_name"),
+ botocore_session=properties.get("botocore_session"),
+ aws_access_key_id=properties.get("aws_access_key_id"),
+ aws_secret_access_key=properties.get("aws_secret_access_key"),
+ aws_session_token=properties.get("aws_session_token"),
+ )
+ self.glue: GlueClient = session.client("glue")
+
+ def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table:
+ properties: Properties = glue_table["Parameters"]
- def _convert_glue_to_iceberg(self, glue_table: Dict[str, Any]) -> Table:
- properties: Properties = glue_table.get(PROP_GLUE_TABLE_PARAMETERS, {})
+ database_name = glue_table["DatabaseName"]
+ table_name = glue_table["Name"]
if TABLE_TYPE not in properties:
raise NoSuchPropertyException(
- f"Property {TABLE_TYPE} missing, could not determine type: "
-
f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}"
+ f"Property {TABLE_TYPE} missing, could not determine type: "
f"{database_name}.{table_name}"
Review Comment:
Fixed.
##########
python/pyiceberg/catalog/glue.py:
##########
@@ -56,124 +64,109 @@
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
from pyiceberg.typedef import EMPTY_DICT
-BOTO_SESSION_CONFIG_KEYS = ["aws_access_key_id", "aws_secret_access_key",
"aws_session_token", "region_name", "profile_name"]
-
-GLUE_CLIENT = "glue"
-
-
-PROP_GLUE_TABLE = "Table"
-PROP_GLUE_TABLE_TYPE = "TableType"
-PROP_GLUE_TABLE_DESCRIPTION = "Description"
-PROP_GLUE_TABLE_PARAMETERS = "Parameters"
-PROP_GLUE_TABLE_DATABASE_NAME = "DatabaseName"
-PROP_GLUE_TABLE_NAME = "Name"
-PROP_GLUE_TABLE_OWNER = "Owner"
-PROP_GLUE_TABLE_STORAGE_DESCRIPTOR = "StorageDescriptor"
-
-PROP_GLUE_TABLELIST = "TableList"
-
-PROP_GLUE_DATABASE = "Database"
-PROP_GLUE_DATABASE_LIST = "DatabaseList"
-PROP_GLUE_DATABASE_NAME = "Name"
-PROP_GLUE_DATABASE_LOCATION = "LocationUri"
-PROP_GLUE_DATABASE_DESCRIPTION = "Description"
-PROP_GLUE_DATABASE_PARAMETERS = "Parameters"
-
-PROP_GLUE_NEXT_TOKEN = "NextToken"
-
-GLUE_DESCRIPTION_KEY = "comment"
-
def _construct_parameters(metadata_location: str) -> Properties:
return {TABLE_TYPE: ICEBERG.upper(), METADATA_LOCATION: metadata_location}
-def _construct_create_table_input(table_name: str, metadata_location: str,
properties: Properties) -> Dict[str, Any]:
- table_input = {
- PROP_GLUE_TABLE_NAME: table_name,
- PROP_GLUE_TABLE_TYPE: EXTERNAL_TABLE,
- PROP_GLUE_TABLE_PARAMETERS: _construct_parameters(metadata_location),
+def _construct_create_table_input(table_name: str, metadata_location: str,
properties: Properties) -> TableInputTypeDef:
+ table_input: TableInputTypeDef = {
+ "Name": table_name,
+ "TableType": EXTERNAL_TABLE,
+ "Parameters": _construct_parameters(metadata_location),
}
- if table_description := properties.get(GLUE_DESCRIPTION_KEY):
- table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description
+ if "Description" in properties:
+ table_input["Description"] = properties["Description"]
return table_input
-def _construct_rename_table_input(to_table_name: str, glue_table: Dict[str,
Any]) -> Dict[str, Any]:
- rename_table_input = {PROP_GLUE_TABLE_NAME: to_table_name}
+def _construct_rename_table_input(to_table_name: str, glue_table:
TableTypeDef) -> TableInputTypeDef:
+ rename_table_input: TableInputTypeDef = {"Name": to_table_name}
# use the same Glue info to create the new table, pointing to the old
metadata
- if table_type := glue_table.get(PROP_GLUE_TABLE_TYPE):
- rename_table_input[PROP_GLUE_TABLE_TYPE] = table_type
- if table_parameters := glue_table.get(PROP_GLUE_TABLE_PARAMETERS):
- rename_table_input[PROP_GLUE_TABLE_PARAMETERS] = table_parameters
- if table_owner := glue_table.get(PROP_GLUE_TABLE_OWNER):
- rename_table_input[PROP_GLUE_TABLE_OWNER] = table_owner
- if table_storage_descriptor :=
glue_table.get(PROP_GLUE_TABLE_STORAGE_DESCRIPTOR):
- rename_table_input[PROP_GLUE_TABLE_STORAGE_DESCRIPTOR] =
table_storage_descriptor
- if table_description := glue_table.get(PROP_GLUE_TABLE_DESCRIPTION):
- rename_table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description
+ rename_table_input["TableType"] = glue_table["TableType"]
+ if "Owner" in glue_table:
+ rename_table_input["Owner"] = glue_table["Owner"]
+
+ if "Parameters" in glue_table:
+ rename_table_input["Parameters"] = glue_table["Parameters"]
+
+ if "StorageDescriptor" in glue_table:
+ # It turns out the output of StorageDescriptor is not the same as the
input type
+ # because the Column can have a different type, but for now it seems
to work, so
+ # silence the type error.
+ rename_table_input["StorageDescriptor"] =
cast(StorageDescriptorTypeDef, glue_table["StorageDescriptor"])
+
+ if "Description" in glue_table:
+ rename_table_input["Description"] = glue_table["Description"]
+
return rename_table_input
-def _construct_database_input(database_name: str, properties: Properties) ->
Dict[str, Any]:
- database_input: Dict[str, Any] = {PROP_GLUE_DATABASE_NAME: database_name}
+def _construct_database_input(database_name: str, properties: Properties) ->
DatabaseInputTypeDef:
+ database_input: DatabaseInputTypeDef = {"Name": database_name}
parameters = {}
for k, v in properties.items():
- if k == GLUE_DESCRIPTION_KEY:
- database_input[PROP_GLUE_DATABASE_DESCRIPTION] = v
+ if k == "Description":
+ database_input["Description"] = v
elif k == LOCATION:
- database_input[PROP_GLUE_DATABASE_LOCATION] = v
+ database_input["LocationUri"] = v
else:
parameters[k] = v
- database_input[PROP_GLUE_DATABASE_PARAMETERS] = parameters
+ database_input["Parameters"] = parameters
return database_input
class GlueCatalog(Catalog):
- def __init__(self, name: str, **properties: str):
+ def __init__(self, name: str, **properties: Any):
super().__init__(name, **properties)
- session_config = {k: v for k, v in properties.items() if k in
BOTO_SESSION_CONFIG_KEYS}
- session = boto3.Session(**session_config)
- self.glue = session.client(GLUE_CLIENT)
+ session = boto3.Session(
+ profile_name=properties.get("profile_name"),
+ region_name=properties.get("region_name"),
+ botocore_session=properties.get("botocore_session"),
+ aws_access_key_id=properties.get("aws_access_key_id"),
+ aws_secret_access_key=properties.get("aws_secret_access_key"),
+ aws_session_token=properties.get("aws_session_token"),
+ )
+ self.glue: GlueClient = session.client("glue")
+
+ def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table:
+ properties: Properties = glue_table["Parameters"]
- def _convert_glue_to_iceberg(self, glue_table: Dict[str, Any]) -> Table:
- properties: Properties = glue_table.get(PROP_GLUE_TABLE_PARAMETERS, {})
+ database_name = glue_table["DatabaseName"]
+ table_name = glue_table["Name"]
if TABLE_TYPE not in properties:
raise NoSuchPropertyException(
- f"Property {TABLE_TYPE} missing, could not determine type: "
-
f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}"
+ f"Property {TABLE_TYPE} missing, could not determine type: "
f"{database_name}.{table_name}"
)
glue_table_type = properties[TABLE_TYPE]
if glue_table_type.lower() != ICEBERG:
raise NoSuchIcebergTableError(
- f"Property table_type is {glue_table_type}, expected
{ICEBERG}: "
-
f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}"
+ f"Property table_type is {glue_table_type}, expected
{ICEBERG}: " f"{database_name}.{table_name}"
)
if METADATA_LOCATION not in properties:
raise NoSuchPropertyException(
- f"Table property {METADATA_LOCATION} is missing, cannot find
metadata for: "
-
f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}"
+ f"Table property {METADATA_LOCATION} is missing, cannot find
metadata for: " f"{database_name}.{table_name}"
Review Comment:
Fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]