This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fdcf975a2d11 [SPARK-48414][PYTHON] Fix breaking change in python's
`fromJson`
fdcf975a2d11 is described below
commit fdcf975a2d11fe4f28219a5e85fc25164a8c8831
Author: Stefan Kandic <[email protected]>
AuthorDate: Wed Jul 24 10:43:38 2024 +0900
[SPARK-48414][PYTHON] Fix breaking change in python's `fromJson`
### What changes were proposed in this pull request?
Fix breaking change in `fromJson` method by having default param values.
### Why are the changes needed?
In order to not break clients that don't care about collations.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing UTs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #46737 from stefankandic/fromJsonBreakingChange.
Authored-by: Stefan Kandic <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/tests/test_types.py | 32 ++++++++++++++++++++++++++++++++
python/pyspark/sql/types.py | 18 +++++++++++-------
2 files changed, 43 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/sql/tests/test_types.py
b/python/pyspark/sql/tests/test_types.py
index 0d0550e42161..dc974fae90fb 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -818,6 +818,38 @@ class TypesTestsMixin:
PySparkTypeError, lambda:
_parse_datatype_json_string(collations_in_nested_map_json)
)
+ def test_array_type_from_json(self):
+ arrayWithoutCollations = ArrayType(StringType(), True)
+ arrayWithCollations = ArrayType(StringType("UNICODE"), True)
+ array_json = {"type": "array", "elementType": "string",
"containsNull": True}
+ collationsMap = {"element": "UNICODE"}
+
+ self.assertEqual(arrayWithoutCollations,
ArrayType.fromJson(array_json))
+ self.assertEqual(
+ arrayWithCollations,
+ ArrayType.fromJson(array_json, fieldPath="",
collationsMap=collationsMap),
+ )
+ self.assertEqual(
+ arrayWithCollations, ArrayType.fromJson(array_json,
collationsMap=collationsMap)
+ )
+
+ def test_map_type_from_json(self):
+ mapWithoutCollations = MapType(StringType(), StringType(), True)
+ mapWithCollations = MapType(StringType("UNICODE"),
StringType("UNICODE"), True)
+ map_json = {
+ "type": "map",
+ "keyType": "string",
+ "valueType": "string",
+ "valueContainsNull": True,
+ }
+ collationsMap = {"key": "UNICODE", "value": "UNICODE"}
+
+ self.assertEqual(mapWithoutCollations, MapType.fromJson(map_json))
+ self.assertEqual(
+ mapWithCollations, MapType.fromJson(map_json, fieldPath="",
collationsMap=collationsMap)
+ )
+ self.assertEqual(mapWithCollations, MapType.fromJson(map_json,
collationsMap=collationsMap))
+
def test_schema_with_bad_collations_provider(self):
from pyspark.sql.types import _parse_datatype_json_string,
_COLLATIONS_METADATA_KEY
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 39763fe8d0a6..67728d4a5cdb 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -762,11 +762,13 @@ class ArrayType(DataType):
def fromJson(
cls,
json: Dict[str, Any],
- fieldPath: str,
- collationsMap: Optional[Dict[str, str]],
+ fieldPath: str = "",
+ collationsMap: Optional[Dict[str, str]] = None,
) -> "ArrayType":
elementType = _parse_datatype_json_value(
- json["elementType"], fieldPath + ".element", collationsMap
+ json["elementType"],
+ "element" if fieldPath == "" else fieldPath + ".element",
+ collationsMap,
)
return ArrayType(elementType, json["containsNull"])
@@ -902,12 +904,14 @@ class MapType(DataType):
def fromJson(
cls,
json: Dict[str, Any],
- fieldPath: str,
- collationsMap: Optional[Dict[str, str]],
+ fieldPath: str = "",
+ collationsMap: Optional[Dict[str, str]] = None,
) -> "MapType":
- keyType = _parse_datatype_json_value(json["keyType"], fieldPath +
".key", collationsMap)
+ keyType = _parse_datatype_json_value(
+ json["keyType"], "key" if fieldPath == "" else fieldPath + ".key",
collationsMap
+ )
valueType = _parse_datatype_json_value(
- json["valueType"], fieldPath + ".value", collationsMap
+ json["valueType"], "value" if fieldPath == "" else fieldPath +
".value", collationsMap
)
return MapType(
keyType,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]