Fokko commented on code in PR #7519:
URL: https://github.com/apache/iceberg/pull/7519#discussion_r1185082289
##########
python/pyiceberg/catalog/rest.py:
##########
@@ -184,37 +185,46 @@ def __init__(self, name: str, **properties: str):
name: Name to identify the catalog
properties: Properties that are passed along to the configuration
"""
- self.properties = properties
+ super().__init__(name, **properties)
self.uri = properties[URI]
+ self._session: Session = Session()
Review Comment:
This part was already messy, but I don't think this makes it much better. I
would suggest the following:
Have `_create_session()` return a session instead of setting it in the
function itself, so we know when `_session` is set:
```python
def _create_session(self) -> Session:
"""Creates a request session with provided catalog configuration"""
session = Session()
# Sets the client side and server side SSL cert verification, if
provided as properties.
if ssl_config := self.properties.get(SSL):
if ssl_ca_bundle := ssl_config.get(CA_BUNDLE): # type: ignore
session.verify = ssl_ca_bundle
if ssl_client := ssl_config.get(CLIENT): # type: ignore
if all(k in ssl_client for k in (CERT, KEY)):
session.cert = (ssl_client[CERT], ssl_client[KEY])
elif ssl_client_cert := ssl_client.get(CERT):
session.cert = ssl_client_cert
# If we have credentials, but not a token, we want to fetch a token
if TOKEN not in self.properties and (credential :=
self.properties.get(CREDENTIAL)):
if SEMICOLON in credential:
client_id, client_secret = credential.split(SEMICOLON)
else:
client_id, client_secret = None, credential
data = {GRANT_TYPE: CLIENT_CREDENTIALS, CLIENT_ID: client_id,
CLIENT_SECRET: client_secret,
SCOPE: CATALOG_SCOPE}
url = self.url(Endpoints.get_token, prefixed=False)
# Uses application/x-www-form-urlencoded by default
response = session.post(url=url, data=data)
try:
response.raise_for_status()
except HTTPError as exc:
self._handle_non_200_response(exc, {400: OAuthError, 401:
OAuthError})
self.properties[TOKEN] =
TokenResponse(**response.json()).access_token
# Set Auth token for subsequent calls in the session
if token := self.properties.get(TOKEN):
session.headers[AUTHORIZATION_HEADER] = f"{BEARER_PREFIX}
{token}"
# Set HTTP headers
session.headers["Content-type"] = "application/json"
session.headers["X-Client-Version"] = ICEBERG_REST_SPEC_VERSION
session.headers["User-Agent"] = f"PyIceberg/{__version__}"
# Configure SigV4 Request Signing
if str(self.properties.get(SIGV4, False)).lower() == "true":
self._init_sigv4()
return session
```
This includes merging `_fetch_access_token` into the method to avoid using
the `self._session`.
- Have a temporary session in `_fetch_config` that we immediately discard
again:
```python
def _fetch_config(self) -> None:
params = {}
if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
params[WAREHOUSE_LOCATION] = warehouse_location
with self._create_session() as session:
response = session.get(self.url(Endpoints.get_config,
prefixed=False), params=params)
try:
response.raise_for_status()
except HTTPError as exc:
self._handle_non_200_response(exc, {})
config_response = ConfigResponse(**response.json())
config = config_response.defaults
config.update(self.properties)
config.update(config_response.overrides)
self.properties = config
# Update URI based on overrides
self.uri = config[URI]
```
Now we can simplify the constructor:
```python
def __init__(self, name: str, **properties: str):
"""Rest Catalog
You either need to provide a client_id and client_secret, or an
already valid token.
Args:
name: Name to identify the catalog
properties: Properties that are passed along to the configuration
"""
super().__init__(name, **properties)
self.uri = properties[URI]
self._fetch_config()
self._session = self._create_session()
```
I've tested this and works when doing vended credentials.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]