This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new ad83f71d78d2 [SPARK-50893][CONNECT] Mark UDT.DataType optional
ad83f71d78d2 is described below

commit ad83f71d78d25fd1b579b58f4c1605fb1399b22e
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jan 21 08:48:55 2025 +0900

    [SPARK-50893][CONNECT] Mark UDT.DataType optional
    
    Mark UDT.DataType optional
    
    this field is actually not required for Scala/Java UDT, e.g. the `VectorUDT`
    
    No
    
    existing protobuf breaking change test
    
    No
    
    Closes #49574 from zhengruifeng/connect_udt_sql_type.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
    (cherry picked from commit efeb1e01ce42e4c626522ab9dbecc0240f9ce507)
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/connect/proto/types_pb2.py              | 10 +++++-----
 python/pyspark/sql/connect/proto/types_pb2.pyi             | 14 +++++++++++++-
 .../common/src/main/protobuf/spark/connect/types.proto     |  6 +++++-
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/connect/proto/types_pb2.py 
b/python/pyspark/sql/connect/proto/types_pb2.py
index 55f98717a5b0..2e0ef7048466 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -35,7 +35,7 @@ _sym_db = _symbol_database.Default()
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xe7!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
 
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
 
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
 
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
 
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
 \x01(\x [...]
+    
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xf9!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
 
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
 
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
 
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
 
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
 \x01(\x [...]
 )
 
 _globals = globals()
@@ -47,7 +47,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
         "DESCRIPTOR"
     ]._serialized_options = 
b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     _globals["_DATATYPE"]._serialized_start = 45
-    _globals["_DATATYPE"]._serialized_end = 4372
+    _globals["_DATATYPE"]._serialized_end = 4390
     _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1595
     _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1662
     _globals["_DATATYPE_BYTE"]._serialized_start = 1664
@@ -97,7 +97,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
     _globals["_DATATYPE_VARIANT"]._serialized_start = 3969
     _globals["_DATATYPE_VARIANT"]._serialized_end = 4036
     _globals["_DATATYPE_UDT"]._serialized_start = 4039
-    _globals["_DATATYPE_UDT"]._serialized_end = 4310
-    _globals["_DATATYPE_UNPARSED"]._serialized_start = 4312
-    _globals["_DATATYPE_UNPARSED"]._serialized_end = 4364
+    _globals["_DATATYPE_UDT"]._serialized_end = 4328
+    _globals["_DATATYPE_UNPARSED"]._serialized_start = 4330
+    _globals["_DATATYPE_UNPARSED"]._serialized_end = 4382
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi 
b/python/pyspark/sql/connect/proto/types_pb2.pyi
index b37621104537..fcf35b8c1f19 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -667,10 +667,14 @@ class DataType(google.protobuf.message.Message):
         SQL_TYPE_FIELD_NUMBER: builtins.int
         type: builtins.str
         jvm_class: builtins.str
+        """Required for Scala/Java UDT"""
         python_class: builtins.str
+        """Required for Python UDT"""
         serialized_python_class: builtins.str
+        """Required for Python UDT"""
         @property
-        def sql_type(self) -> global___DataType: ...
+        def sql_type(self) -> global___DataType:
+            """Required for Python UDT"""
         def __init__(
             self,
             *,
@@ -689,6 +693,8 @@ class DataType(google.protobuf.message.Message):
                 b"_python_class",
                 "_serialized_python_class",
                 b"_serialized_python_class",
+                "_sql_type",
+                b"_sql_type",
                 "jvm_class",
                 b"jvm_class",
                 "python_class",
@@ -708,6 +714,8 @@ class DataType(google.protobuf.message.Message):
                 b"_python_class",
                 "_serialized_python_class",
                 b"_serialized_python_class",
+                "_sql_type",
+                b"_sql_type",
                 "jvm_class",
                 b"jvm_class",
                 "python_class",
@@ -735,6 +743,10 @@ class DataType(google.protobuf.message.Message):
                 "_serialized_python_class", b"_serialized_python_class"
             ],
         ) -> typing_extensions.Literal["serialized_python_class"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_sql_type", 
b"_sql_type"]
+        ) -> typing_extensions.Literal["sql_type"] | None: ...
 
     class Unparsed(google.protobuf.message.Message):
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/types.proto 
b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
index 4f768f201575..e1a111e5d691 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -188,10 +188,14 @@ message DataType {
 
   message UDT {
     string type = 1;
+    // Required for Scala/Java UDT
     optional string jvm_class = 2;
+    // Required for Python UDT
     optional string python_class = 3;
+    // Required for Python UDT
     optional string serialized_python_class = 4;
-    DataType sql_type = 5;
+    // Required for Python UDT
+    optional DataType sql_type = 5;
   }
 
   message Unparsed {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to