This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ea8519c055 Avoid importing pandas and numpy in runtime and module
level (#33483)
ea8519c055 is described below
commit ea8519c0554d16b13d330a686f8479fc10cc58f2
Author: Hussein Awala <[email protected]>
AuthorDate: Fri Aug 18 07:45:45 2023 +0200
Avoid importing pandas and numpy in runtime and module level (#33483)
* Avoid importing pandas and numpy in runtime, and import them in the
methods which use them instead of the module
* fix salesforce tests
---
airflow/providers/apache/hive/hooks/hive.py | 15 +++++++++------
airflow/providers/exasol/hooks/exasol.py | 6 ++++--
airflow/providers/google/cloud/hooks/bigquery.py | 6 ++++--
airflow/providers/influxdb/hooks/influxdb.py | 6 +++++-
airflow/providers/oracle/hooks/oracle.py | 11 +++++------
airflow/providers/salesforce/hooks/salesforce.py | 10 ++++++++--
airflow/providers/slack/transfers/sql_to_slack.py | 3 ++-
tests/providers/salesforce/hooks/test_salesforce.py | 10 +++++-----
tests/serialization/serializers/test_serializers.py | 3 ++-
9 files changed, 44 insertions(+), 26 deletions(-)
diff --git a/airflow/providers/apache/hive/hooks/hive.py
b/airflow/providers/apache/hive/hooks/hive.py
index 3282d45aca..579918b470 100644
--- a/airflow/providers/apache/hive/hooks/hive.py
+++ b/airflow/providers/apache/hive/hooks/hive.py
@@ -26,16 +26,12 @@ import time
import warnings
from collections import OrderedDict
from tempfile import NamedTemporaryFile, TemporaryDirectory
-from typing import Any, Iterable, Mapping
+from typing import TYPE_CHECKING, Any, Iterable, Mapping
from airflow.exceptions import AirflowProviderDeprecationWarning
-try:
+if TYPE_CHECKING:
import pandas as pd
-except ImportError as e:
- from airflow.exceptions import AirflowOptionalProviderFeatureException
-
- raise AirflowOptionalProviderFeatureException(e)
import csv
@@ -1055,6 +1051,13 @@ class HiveServer2Hook(DbApiHook):
:return: pandas.DateFrame
"""
+ try:
+ import pandas as pd
+ except ImportError as e:
+ from airflow.exceptions import
AirflowOptionalProviderFeatureException
+
+ raise AirflowOptionalProviderFeatureException(e)
+
res = self.get_results(sql, schema=schema, hive_conf=hive_conf)
df = pd.DataFrame(res["data"], columns=[c[0] for c in res["header"]],
**kwargs)
return df
diff --git a/airflow/providers/exasol/hooks/exasol.py
b/airflow/providers/exasol/hooks/exasol.py
index 0911e39604..7a03fdd097 100644
--- a/airflow/providers/exasol/hooks/exasol.py
+++ b/airflow/providers/exasol/hooks/exasol.py
@@ -18,14 +18,16 @@
from __future__ import annotations
from contextlib import closing
-from typing import Any, Callable, Iterable, Mapping, Sequence, TypeVar,
overload
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, Sequence,
TypeVar, overload
-import pandas as pd
import pyexasol
from pyexasol import ExaConnection, ExaStatement
from airflow.providers.common.sql.hooks.sql import DbApiHook,
return_single_query_results
+if TYPE_CHECKING:
+ import pandas as pd
+
T = TypeVar("T")
diff --git a/airflow/providers/google/cloud/hooks/bigquery.py
b/airflow/providers/google/cloud/hooks/bigquery.py
index 44185963b5..96340c0d50 100644
--- a/airflow/providers/google/cloud/hooks/bigquery.py
+++ b/airflow/providers/google/cloud/hooks/bigquery.py
@@ -28,9 +28,8 @@ import uuid
import warnings
from copy import deepcopy
from datetime import datetime, timedelta
-from typing import Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
+from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence,
Union, cast
-import pandas as pd
from aiohttp import ClientSession as ClientSession
from gcloud.aio.bigquery import Job, Table as Table_async
from google.api_core.page_iterator import HTTPIterator
@@ -69,6 +68,9 @@ except ModuleNotFoundError:
from airflow.utils.helpers import convert_camel_to_snake
from airflow.utils.log.logging_mixin import LoggingMixin
+if TYPE_CHECKING:
+ import pandas as pd
+
log = logging.getLogger(__name__)
BigQueryJob = Union[CopyJob, QueryJob, LoadJob, ExtractJob]
diff --git a/airflow/providers/influxdb/hooks/influxdb.py
b/airflow/providers/influxdb/hooks/influxdb.py
index b192450749..290dd37727 100644
--- a/airflow/providers/influxdb/hooks/influxdb.py
+++ b/airflow/providers/influxdb/hooks/influxdb.py
@@ -24,7 +24,8 @@ This module allows to connect to a InfluxDB database.
"""
from __future__ import annotations
-import pandas as pd
+from typing import TYPE_CHECKING
+
from influxdb_client import InfluxDBClient
from influxdb_client.client.flux_table import FluxTable
from influxdb_client.client.write.point import Point
@@ -33,6 +34,9 @@ from influxdb_client.client.write_api import SYNCHRONOUS
from airflow.hooks.base import BaseHook
from airflow.models import Connection
+if TYPE_CHECKING:
+ import pandas as pd
+
class InfluxDBHook(BaseHook):
"""Interact with InfluxDB.
diff --git a/airflow/providers/oracle/hooks/oracle.py
b/airflow/providers/oracle/hooks/oracle.py
index 38f1878964..225a8ca6cc 100644
--- a/airflow/providers/oracle/hooks/oracle.py
+++ b/airflow/providers/oracle/hooks/oracle.py
@@ -24,12 +24,6 @@ from datetime import datetime
import oracledb
from airflow.exceptions import AirflowProviderDeprecationWarning
-
-try:
- import numpy
-except ImportError:
- numpy = None # type: ignore
-
from airflow.providers.common.sql.hooks.sql import DbApiHook
PARAM_TYPES = {bool, float, int, str}
@@ -280,6 +274,11 @@ class OracleHook(DbApiHook):
Set 1 to insert each row in each single transaction
:param replace: Whether to replace instead of insert
"""
+ try:
+ import numpy
+ except ImportError:
+ numpy = None # type: ignore
+
if target_fields:
target_fields = ", ".join(target_fields)
target_fields = f"({target_fields})"
diff --git a/airflow/providers/salesforce/hooks/salesforce.py
b/airflow/providers/salesforce/hooks/salesforce.py
index 336c0db0a0..5b390877f2 100644
--- a/airflow/providers/salesforce/hooks/salesforce.py
+++ b/airflow/providers/salesforce/hooks/salesforce.py
@@ -26,14 +26,16 @@ from __future__ import annotations
import logging
import time
from functools import cached_property
-from typing import Any, Iterable
+from typing import TYPE_CHECKING, Any, Iterable
-import pandas as pd
from requests import Session
from simple_salesforce import Salesforce, api
from airflow.hooks.base import BaseHook
+if TYPE_CHECKING:
+ import pandas as pd
+
log = logging.getLogger(__name__)
@@ -240,6 +242,8 @@ class SalesforceHook(BaseHook):
# between 0 and 10 are turned into timestamps
# if the column cannot be converted,
# just return the original column untouched
+ import pandas as pd
+
try:
column = pd.to_datetime(column)
except ValueError:
@@ -355,6 +359,8 @@ class SalesforceHook(BaseHook):
to the resulting data that marks when the data was fetched from
Salesforce. Default: False
:return: the dataframe.
"""
+ import pandas as pd
+
# this line right here will convert all integers to floats
# if there are any None/np.nan values in the column
# that's because None/np.nan cannot exist in an integer column
diff --git a/airflow/providers/slack/transfers/sql_to_slack.py
b/airflow/providers/slack/transfers/sql_to_slack.py
index ba72e689ee..b04adcab09 100644
--- a/airflow/providers/slack/transfers/sql_to_slack.py
+++ b/airflow/providers/slack/transfers/sql_to_slack.py
@@ -19,7 +19,6 @@ from __future__ import annotations
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence
-import pandas as pd
from tabulate import tabulate
from airflow.exceptions import AirflowException
@@ -31,6 +30,8 @@ from airflow.providers.slack.hooks.slack_webhook import
SlackWebhookHook
from airflow.providers.slack.utils import parse_filename
if TYPE_CHECKING:
+ import pandas as pd
+
from airflow.utils.context import Context
diff --git a/tests/providers/salesforce/hooks/test_salesforce.py
b/tests/providers/salesforce/hooks/test_salesforce.py
index db0d261ea7..bb5ed82377 100644
--- a/tests/providers/salesforce/hooks/test_salesforce.py
+++ b/tests/providers/salesforce/hooks/test_salesforce.py
@@ -338,7 +338,7 @@ class TestSalesforceHook:
self.salesforce_hook.write_object_to_file(query_results=[],
filename="test", fmt="test")
@patch(
-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+ "pandas.DataFrame.from_records",
return_value=pd.DataFrame({"test": [1, 2, 3], "dict": [nan, nan,
{"foo": "bar"}]}),
)
def test_write_object_to_file_csv(self, mock_data_frame):
@@ -360,7 +360,7 @@ class TestSalesforceHook:
return_value={"fields": [{"name": "field_1", "type": "date"}]},
)
@patch(
-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+ "pandas.DataFrame.from_records",
return_value=pd.DataFrame({"test": [1, 2, 3], "field_1":
["2019-01-01", "2019-01-02", "2019-01-03"]}),
)
def test_write_object_to_file_json_with_timestamp_conversion(self,
mock_data_frame, mock_describe_object):
@@ -383,7 +383,7 @@ class TestSalesforceHook:
@patch("airflow.providers.salesforce.hooks.salesforce.time.time",
return_value=1.23)
@patch(
-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+ "pandas.DataFrame.from_records",
return_value=pd.DataFrame({"test": [1, 2, 3]}),
)
def test_write_object_to_file_ndjson_with_record_time(self,
mock_data_frame, mock_time):
@@ -416,7 +416,7 @@ class TestSalesforceHook:
return_value={"fields": [{"name": "field_1", "type": "date"}]},
)
@patch(
-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+ "pandas.DataFrame.from_records",
return_value=pd.DataFrame({"test": [1, 2, 3], "field_1":
["2019-01-01", "2019-01-02", "2019-01-03"]}),
)
def test_object_to_df_with_timestamp_conversion(self, mock_data_frame,
mock_describe_object):
@@ -434,7 +434,7 @@ class TestSalesforceHook:
@patch("airflow.providers.salesforce.hooks.salesforce.time.time",
return_value=1.23)
@patch(
-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+ "pandas.DataFrame.from_records",
return_value=pd.DataFrame({"test": [1, 2, 3]}),
)
def test_object_to_df_with_record_time(self, mock_data_frame, mock_time):
diff --git a/tests/serialization/serializers/test_serializers.py
b/tests/serialization/serializers/test_serializers.py
index e9805d4d77..66f142e0ac 100644
--- a/tests/serialization/serializers/test_serializers.py
+++ b/tests/serialization/serializers/test_serializers.py
@@ -20,7 +20,6 @@ import datetime
import decimal
import numpy
-import pandas as pd
import pendulum.tz
import pytest
from pendulum import DateTime
@@ -94,6 +93,8 @@ class TestSerializers:
assert i["x"] == d["x"]
def test_pandas(self):
+ import pandas as pd
+
i = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
e = serialize(i)
d = deserialize(e)