This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new ea8519c055 Avoid importing pandas and numpy in runtime and module 
level (#33483)
ea8519c055 is described below

commit ea8519c0554d16b13d330a686f8479fc10cc58f2
Author: Hussein Awala <[email protected]>
AuthorDate: Fri Aug 18 07:45:45 2023 +0200

    Avoid importing pandas and numpy in runtime and module level (#33483)
    
    * Avoid importing pandas and numpy in runtime, and import them in the 
methods which use them instead of the module
    
    * fix salesforce tests
---
 airflow/providers/apache/hive/hooks/hive.py         | 15 +++++++++------
 airflow/providers/exasol/hooks/exasol.py            |  6 ++++--
 airflow/providers/google/cloud/hooks/bigquery.py    |  6 ++++--
 airflow/providers/influxdb/hooks/influxdb.py        |  6 +++++-
 airflow/providers/oracle/hooks/oracle.py            | 11 +++++------
 airflow/providers/salesforce/hooks/salesforce.py    | 10 ++++++++--
 airflow/providers/slack/transfers/sql_to_slack.py   |  3 ++-
 tests/providers/salesforce/hooks/test_salesforce.py | 10 +++++-----
 tests/serialization/serializers/test_serializers.py |  3 ++-
 9 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/airflow/providers/apache/hive/hooks/hive.py 
b/airflow/providers/apache/hive/hooks/hive.py
index 3282d45aca..579918b470 100644
--- a/airflow/providers/apache/hive/hooks/hive.py
+++ b/airflow/providers/apache/hive/hooks/hive.py
@@ -26,16 +26,12 @@ import time
 import warnings
 from collections import OrderedDict
 from tempfile import NamedTemporaryFile, TemporaryDirectory
-from typing import Any, Iterable, Mapping
+from typing import TYPE_CHECKING, Any, Iterable, Mapping
 
 from airflow.exceptions import AirflowProviderDeprecationWarning
 
-try:
+if TYPE_CHECKING:
     import pandas as pd
-except ImportError as e:
-    from airflow.exceptions import AirflowOptionalProviderFeatureException
-
-    raise AirflowOptionalProviderFeatureException(e)
 
 import csv
 
@@ -1055,6 +1051,13 @@ class HiveServer2Hook(DbApiHook):
 
         :return: pandas.DateFrame
         """
+        try:
+            import pandas as pd
+        except ImportError as e:
+            from airflow.exceptions import 
AirflowOptionalProviderFeatureException
+
+            raise AirflowOptionalProviderFeatureException(e)
+
         res = self.get_results(sql, schema=schema, hive_conf=hive_conf)
         df = pd.DataFrame(res["data"], columns=[c[0] for c in res["header"]], 
**kwargs)
         return df
diff --git a/airflow/providers/exasol/hooks/exasol.py 
b/airflow/providers/exasol/hooks/exasol.py
index 0911e39604..7a03fdd097 100644
--- a/airflow/providers/exasol/hooks/exasol.py
+++ b/airflow/providers/exasol/hooks/exasol.py
@@ -18,14 +18,16 @@
 from __future__ import annotations
 
 from contextlib import closing
-from typing import Any, Callable, Iterable, Mapping, Sequence, TypeVar, 
overload
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, Sequence, 
TypeVar, overload
 
-import pandas as pd
 import pyexasol
 from pyexasol import ExaConnection, ExaStatement
 
 from airflow.providers.common.sql.hooks.sql import DbApiHook, 
return_single_query_results
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 T = TypeVar("T")
 
 
diff --git a/airflow/providers/google/cloud/hooks/bigquery.py 
b/airflow/providers/google/cloud/hooks/bigquery.py
index 44185963b5..96340c0d50 100644
--- a/airflow/providers/google/cloud/hooks/bigquery.py
+++ b/airflow/providers/google/cloud/hooks/bigquery.py
@@ -28,9 +28,8 @@ import uuid
 import warnings
 from copy import deepcopy
 from datetime import datetime, timedelta
-from typing import Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
+from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, 
Union, cast
 
-import pandas as pd
 from aiohttp import ClientSession as ClientSession
 from gcloud.aio.bigquery import Job, Table as Table_async
 from google.api_core.page_iterator import HTTPIterator
@@ -69,6 +68,9 @@ except ModuleNotFoundError:
 from airflow.utils.helpers import convert_camel_to_snake
 from airflow.utils.log.logging_mixin import LoggingMixin
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 log = logging.getLogger(__name__)
 
 BigQueryJob = Union[CopyJob, QueryJob, LoadJob, ExtractJob]
diff --git a/airflow/providers/influxdb/hooks/influxdb.py 
b/airflow/providers/influxdb/hooks/influxdb.py
index b192450749..290dd37727 100644
--- a/airflow/providers/influxdb/hooks/influxdb.py
+++ b/airflow/providers/influxdb/hooks/influxdb.py
@@ -24,7 +24,8 @@ This module allows to connect to a InfluxDB database.
 """
 from __future__ import annotations
 
-import pandas as pd
+from typing import TYPE_CHECKING
+
 from influxdb_client import InfluxDBClient
 from influxdb_client.client.flux_table import FluxTable
 from influxdb_client.client.write.point import Point
@@ -33,6 +34,9 @@ from influxdb_client.client.write_api import SYNCHRONOUS
 from airflow.hooks.base import BaseHook
 from airflow.models import Connection
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 
 class InfluxDBHook(BaseHook):
     """Interact with InfluxDB.
diff --git a/airflow/providers/oracle/hooks/oracle.py 
b/airflow/providers/oracle/hooks/oracle.py
index 38f1878964..225a8ca6cc 100644
--- a/airflow/providers/oracle/hooks/oracle.py
+++ b/airflow/providers/oracle/hooks/oracle.py
@@ -24,12 +24,6 @@ from datetime import datetime
 import oracledb
 
 from airflow.exceptions import AirflowProviderDeprecationWarning
-
-try:
-    import numpy
-except ImportError:
-    numpy = None  # type: ignore
-
 from airflow.providers.common.sql.hooks.sql import DbApiHook
 
 PARAM_TYPES = {bool, float, int, str}
@@ -280,6 +274,11 @@ class OracleHook(DbApiHook):
             Set 1 to insert each row in each single transaction
         :param replace: Whether to replace instead of insert
         """
+        try:
+            import numpy
+        except ImportError:
+            numpy = None  # type: ignore
+
         if target_fields:
             target_fields = ", ".join(target_fields)
             target_fields = f"({target_fields})"
diff --git a/airflow/providers/salesforce/hooks/salesforce.py 
b/airflow/providers/salesforce/hooks/salesforce.py
index 336c0db0a0..5b390877f2 100644
--- a/airflow/providers/salesforce/hooks/salesforce.py
+++ b/airflow/providers/salesforce/hooks/salesforce.py
@@ -26,14 +26,16 @@ from __future__ import annotations
 import logging
 import time
 from functools import cached_property
-from typing import Any, Iterable
+from typing import TYPE_CHECKING, Any, Iterable
 
-import pandas as pd
 from requests import Session
 from simple_salesforce import Salesforce, api
 
 from airflow.hooks.base import BaseHook
 
+if TYPE_CHECKING:
+    import pandas as pd
+
 log = logging.getLogger(__name__)
 
 
@@ -240,6 +242,8 @@ class SalesforceHook(BaseHook):
         # between 0 and 10 are turned into timestamps
         # if the column cannot be converted,
         # just return the original column untouched
+        import pandas as pd
+
         try:
             column = pd.to_datetime(column)
         except ValueError:
@@ -355,6 +359,8 @@ class SalesforceHook(BaseHook):
             to the resulting data that marks when the data was fetched from 
Salesforce. Default: False
         :return: the dataframe.
         """
+        import pandas as pd
+
         # this line right here will convert all integers to floats
         # if there are any None/np.nan values in the column
         # that's because None/np.nan cannot exist in an integer column
diff --git a/airflow/providers/slack/transfers/sql_to_slack.py 
b/airflow/providers/slack/transfers/sql_to_slack.py
index ba72e689ee..b04adcab09 100644
--- a/airflow/providers/slack/transfers/sql_to_slack.py
+++ b/airflow/providers/slack/transfers/sql_to_slack.py
@@ -19,7 +19,6 @@ from __future__ import annotations
 from tempfile import NamedTemporaryFile
 from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence
 
-import pandas as pd
 from tabulate import tabulate
 
 from airflow.exceptions import AirflowException
@@ -31,6 +30,8 @@ from airflow.providers.slack.hooks.slack_webhook import 
SlackWebhookHook
 from airflow.providers.slack.utils import parse_filename
 
 if TYPE_CHECKING:
+    import pandas as pd
+
     from airflow.utils.context import Context
 
 
diff --git a/tests/providers/salesforce/hooks/test_salesforce.py 
b/tests/providers/salesforce/hooks/test_salesforce.py
index db0d261ea7..bb5ed82377 100644
--- a/tests/providers/salesforce/hooks/test_salesforce.py
+++ b/tests/providers/salesforce/hooks/test_salesforce.py
@@ -338,7 +338,7 @@ class TestSalesforceHook:
             self.salesforce_hook.write_object_to_file(query_results=[], 
filename="test", fmt="test")
 
     @patch(
-        
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+        "pandas.DataFrame.from_records",
         return_value=pd.DataFrame({"test": [1, 2, 3], "dict": [nan, nan, 
{"foo": "bar"}]}),
     )
     def test_write_object_to_file_csv(self, mock_data_frame):
@@ -360,7 +360,7 @@ class TestSalesforceHook:
         return_value={"fields": [{"name": "field_1", "type": "date"}]},
     )
     @patch(
-        
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+        "pandas.DataFrame.from_records",
         return_value=pd.DataFrame({"test": [1, 2, 3], "field_1": 
["2019-01-01", "2019-01-02", "2019-01-03"]}),
     )
     def test_write_object_to_file_json_with_timestamp_conversion(self, 
mock_data_frame, mock_describe_object):
@@ -383,7 +383,7 @@ class TestSalesforceHook:
 
     @patch("airflow.providers.salesforce.hooks.salesforce.time.time", 
return_value=1.23)
     @patch(
-        
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+        "pandas.DataFrame.from_records",
         return_value=pd.DataFrame({"test": [1, 2, 3]}),
     )
     def test_write_object_to_file_ndjson_with_record_time(self, 
mock_data_frame, mock_time):
@@ -416,7 +416,7 @@ class TestSalesforceHook:
         return_value={"fields": [{"name": "field_1", "type": "date"}]},
     )
     @patch(
-        
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+        "pandas.DataFrame.from_records",
         return_value=pd.DataFrame({"test": [1, 2, 3], "field_1": 
["2019-01-01", "2019-01-02", "2019-01-03"]}),
     )
     def test_object_to_df_with_timestamp_conversion(self, mock_data_frame, 
mock_describe_object):
@@ -434,7 +434,7 @@ class TestSalesforceHook:
 
     @patch("airflow.providers.salesforce.hooks.salesforce.time.time", 
return_value=1.23)
     @patch(
-        
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
+        "pandas.DataFrame.from_records",
         return_value=pd.DataFrame({"test": [1, 2, 3]}),
     )
     def test_object_to_df_with_record_time(self, mock_data_frame, mock_time):
diff --git a/tests/serialization/serializers/test_serializers.py 
b/tests/serialization/serializers/test_serializers.py
index e9805d4d77..66f142e0ac 100644
--- a/tests/serialization/serializers/test_serializers.py
+++ b/tests/serialization/serializers/test_serializers.py
@@ -20,7 +20,6 @@ import datetime
 import decimal
 
 import numpy
-import pandas as pd
 import pendulum.tz
 import pytest
 from pendulum import DateTime
@@ -94,6 +93,8 @@ class TestSerializers:
         assert i["x"] == d["x"]
 
     def test_pandas(self):
+        import pandas as pd
+
         i = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
         e = serialize(i)
         d = deserialize(e)

Reply via email to