This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 60da3179fa61 [SPARK-51701][PYTHON][TESTS] Move test objects to a
separate file
60da3179fa61 is described below
commit 60da3179fa61b3928d2dd15d97fd757e680019e1
Author: Takuya Ueshin <[email protected]>
AuthorDate: Thu Apr 3 11:20:36 2025 +0800
[SPARK-51701][PYTHON][TESTS] Move test objects to a separate file
Moves test objects to a separate file.
Some classes for tests should be placed in a clean separate file to avoid
unnecessary dependencies.
No, test only.
The existing tests should pass.
No.
Closes #50503 from ueshin/issues/SPARK-51701/test_objects.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
(cherry picked from commit 295d37fad3b67ac0c73629d5eaebb3baefaeea7e)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/tests/arrow/test_arrow.py | 3 +-
.../sql/tests/connect/test_connect_creation.py | 2 +-
.../sql/tests/connect/test_connect_readwriter.py | 2 +-
python/pyspark/sql/tests/test_serde.py | 6 +-
python/pyspark/sql/tests/test_types.py | 4 +-
python/pyspark/sql/tests/test_udf.py | 3 +-
python/pyspark/testing/objects.py | 121 +++++++++++++++++++++
python/pyspark/testing/sqlutils.py | 105 +-----------------
.../apache/spark/sql/test/ExamplePointUDT.scala | 2 +-
9 files changed, 131 insertions(+), 117 deletions(-)
diff --git a/python/pyspark/sql/tests/arrow/test_arrow.py
b/python/pyspark/sql/tests/arrow/test_arrow.py
index 065f97fcf7c7..5a770a947889 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow.py
@@ -45,14 +45,13 @@ from pyspark.sql.types import (
NullType,
DayTimeIntervalType,
)
+from pyspark.testing.objects import ExamplePoint, ExamplePointUDT
from pyspark.testing.sqlutils import (
ReusedSQLTestCase,
have_pandas,
have_pyarrow,
pandas_requirement_message,
pyarrow_requirement_message,
- ExamplePoint,
- ExamplePointUDT,
)
from pyspark.errors import ArithmeticException, PySparkTypeError,
UnsupportedOperationException
from pyspark.loose_version import LooseVersion
diff --git a/python/pyspark/sql/tests/connect/test_connect_creation.py
b/python/pyspark/sql/tests/connect/test_connect_creation.py
index 5352913f6609..3d67c33a5834 100644
--- a/python/pyspark/sql/tests/connect/test_connect_creation.py
+++ b/python/pyspark/sql/tests/connect/test_connect_creation.py
@@ -32,7 +32,7 @@ from pyspark.sql.types import (
ArrayType,
Row,
)
-from pyspark.testing.sqlutils import MyObject, PythonOnlyUDT
+from pyspark.testing.objects import MyObject, PythonOnlyUDT
from pyspark.testing.connectutils import should_test_connect
from pyspark.sql.tests.connect.test_connect_basic import
SparkConnectSQLTestCase
diff --git a/python/pyspark/sql/tests/connect/test_connect_readwriter.py
b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
index 06266b86de3f..dc82d93f9581 100644
--- a/python/pyspark/sql/tests/connect/test_connect_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
@@ -30,7 +30,7 @@ from pyspark.sql.types import (
MapType,
Row,
)
-from pyspark.testing.sqlutils import (
+from pyspark.testing.objects import (
PythonOnlyUDT,
ExamplePoint,
PythonOnlyPoint,
diff --git a/python/pyspark/sql/tests/test_serde.py
b/python/pyspark/sql/tests/test_serde.py
index 01cf3c51d7de..eab1ad043ef3 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -23,7 +23,8 @@ import time
from pyspark.sql import Row
from pyspark.sql.functions import lit
from pyspark.sql.types import StructType, StructField, DecimalType, BinaryType
-from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone
+from pyspark.testing.objects import UTCOffsetTimezone
+from pyspark.testing.sqlutils import ReusedSQLTestCase
class SerdeTestsMixin:
@@ -82,9 +83,6 @@ class SerdeTestsMixin:
day = datetime.date.today()
now = datetime.datetime.now()
ts = time.mktime(now.timetuple())
- # class in __main__ is not serializable
- from pyspark.testing.sqlutils import UTCOffsetTimezone
-
utc = UTCOffsetTimezone()
utcnow = datetime.datetime.utcfromtimestamp(ts) # without microseconds
# add microseconds to utcnow (keeping
year,month,day,hour,minute,second)
diff --git a/python/pyspark/sql/tests/test_types.py
b/python/pyspark/sql/tests/test_types.py
index d920aec6d8d2..15247b97664d 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -71,14 +71,14 @@ from pyspark.sql.types import (
_make_type_verifier,
_merge_type,
)
-from pyspark.testing.sqlutils import (
- ReusedSQLTestCase,
+from pyspark.testing.objects import (
ExamplePointUDT,
PythonOnlyUDT,
ExamplePoint,
PythonOnlyPoint,
MyObject,
)
+from pyspark.testing.sqlutils import ReusedSQLTestCase
from pyspark.testing.utils import PySparkErrorTestUtils
diff --git a/python/pyspark/sql/tests/test_udf.py
b/python/pyspark/sql/tests/test_udf.py
index 067cc0b06a8f..01da0a018367 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -44,9 +44,8 @@ from pyspark.sql.types import (
VariantVal,
)
from pyspark.errors import AnalysisException, PythonException, PySparkTypeError
+from pyspark.testing.objects import ExamplePoint, ExamplePointUDT
from pyspark.testing.sqlutils import (
- ExamplePoint,
- ExamplePointUDT,
ReusedSQLTestCase,
test_compiled,
test_not_compiled_message,
diff --git a/python/pyspark/testing/objects.py
b/python/pyspark/testing/objects.py
new file mode 100644
index 000000000000..5b97664afbdd
--- /dev/null
+++ b/python/pyspark/testing/objects.py
@@ -0,0 +1,121 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import datetime
+
+from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType
+
+
+class UTCOffsetTimezone(datetime.tzinfo):
+ """
+ Specifies timezone in UTC offset
+ """
+
+ def __init__(self, offset=0):
+ self.ZERO = datetime.timedelta(hours=offset)
+
+ def utcoffset(self, dt):
+ return self.ZERO
+
+ def dst(self, dt):
+ return self.ZERO
+
+
+class ExamplePointUDT(UserDefinedType):
+ """
+ User-defined type (UDT) for ExamplePoint.
+ """
+
+ @classmethod
+ def sqlType(cls):
+ return ArrayType(DoubleType(), False)
+
+ @classmethod
+ def module(cls):
+ return "pyspark.sql.tests"
+
+ @classmethod
+ def scalaUDT(cls):
+ return "org.apache.spark.sql.test.ExamplePointUDT"
+
+ def serialize(self, obj):
+ return [obj.x, obj.y]
+
+ def deserialize(self, datum):
+ return ExamplePoint(datum[0], datum[1])
+
+
+class ExamplePoint:
+ """
+ An example class to demonstrate UDT in Scala, Java, and Python.
+ """
+
+ __UDT__ = ExamplePointUDT()
+
+ def __init__(self, x, y):
+ self.x = x
+ self.y = y
+
+ def __repr__(self):
+ return "ExamplePoint(%s,%s)" % (self.x, self.y)
+
+ def __str__(self):
+ return "(%s,%s)" % (self.x, self.y)
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and other.x == self.x and
other.y == self.y
+
+
+class PythonOnlyUDT(UserDefinedType):
+ """
+ User-defined type (UDT) for ExamplePoint.
+ """
+
+ @classmethod
+ def sqlType(cls):
+ return ArrayType(DoubleType(), False)
+
+ @classmethod
+ def module(cls):
+ return "__main__"
+
+ def serialize(self, obj):
+ return [obj.x, obj.y]
+
+ def deserialize(self, datum):
+ return PythonOnlyPoint(datum[0], datum[1])
+
+ @staticmethod
+ def foo():
+ pass
+
+ @property
+ def props(self):
+ return {}
+
+
+class PythonOnlyPoint(ExamplePoint):
+ """
+ An example class to demonstrate UDT in only Python
+ """
+
+ __UDT__ = PythonOnlyUDT() # type: ignore
+
+
+class MyObject:
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
diff --git a/python/pyspark/testing/sqlutils.py
b/python/pyspark/testing/sqlutils.py
index 4151dfd90459..98d04e7d5b1a 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -16,7 +16,6 @@
#
import glob
-import datetime
import math
import os
import shutil
@@ -24,7 +23,7 @@ import tempfile
from contextlib import contextmanager
from pyspark.sql import SparkSession
-from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
+from pyspark.sql.types import Row
from pyspark.testing.utils import (
ReusedPySparkTestCase,
PySparkErrorTestUtils,
@@ -75,108 +74,6 @@ except Exception as e:
test_compiled = test_not_compiled_message is None
-class UTCOffsetTimezone(datetime.tzinfo):
- """
- Specifies timezone in UTC offset
- """
-
- def __init__(self, offset=0):
- self.ZERO = datetime.timedelta(hours=offset)
-
- def utcoffset(self, dt):
- return self.ZERO
-
- def dst(self, dt):
- return self.ZERO
-
-
-class ExamplePointUDT(UserDefinedType):
- """
- User-defined type (UDT) for ExamplePoint.
- """
-
- @classmethod
- def sqlType(cls):
- return ArrayType(DoubleType(), False)
-
- @classmethod
- def module(cls):
- return "pyspark.sql.tests"
-
- @classmethod
- def scalaUDT(cls):
- return "org.apache.spark.sql.test.ExamplePointUDT"
-
- def serialize(self, obj):
- return [obj.x, obj.y]
-
- def deserialize(self, datum):
- return ExamplePoint(datum[0], datum[1])
-
-
-class ExamplePoint:
- """
- An example class to demonstrate UDT in Scala, Java, and Python.
- """
-
- __UDT__ = ExamplePointUDT()
-
- def __init__(self, x, y):
- self.x = x
- self.y = y
-
- def __repr__(self):
- return "ExamplePoint(%s,%s)" % (self.x, self.y)
-
- def __str__(self):
- return "(%s,%s)" % (self.x, self.y)
-
- def __eq__(self, other):
- return isinstance(other, self.__class__) and other.x == self.x and
other.y == self.y
-
-
-class PythonOnlyUDT(UserDefinedType):
- """
- User-defined type (UDT) for ExamplePoint.
- """
-
- @classmethod
- def sqlType(cls):
- return ArrayType(DoubleType(), False)
-
- @classmethod
- def module(cls):
- return "__main__"
-
- def serialize(self, obj):
- return [obj.x, obj.y]
-
- def deserialize(self, datum):
- return PythonOnlyPoint(datum[0], datum[1])
-
- @staticmethod
- def foo():
- pass
-
- @property
- def props(self):
- return {}
-
-
-class PythonOnlyPoint(ExamplePoint):
- """
- An example class to demonstrate UDT in only Python
- """
-
- __UDT__ = PythonOnlyUDT() # type: ignore
-
-
-class MyObject:
- def __init__(self, key, value):
- self.key = key
- self.value = value
-
-
class SQLTestUtils:
"""
This util assumes the instance of this to have 'spark' attribute, having a
spark session.
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
index 7beac16599de..e5e3b17e08d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
@@ -45,7 +45,7 @@ private[sql] class ExamplePointUDT extends
UserDefinedType[ExamplePoint] {
override def sqlType: DataType = ArrayType(DoubleType, false)
- override def pyUDT: String = "pyspark.testing.sqlutils.ExamplePointUDT"
+ override def pyUDT: String = "pyspark.testing.objects.ExamplePointUDT"
override def serialize(p: ExamplePoint): GenericArrayData = {
val output = new Array[Any](2)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]