This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c229245c0177 [SPARK-52583][SQL] Add an Developer API for stringifying
values in UserDefinedType
c229245c0177 is described below
commit c229245c01779b46436b3e236e89f4835bf31096
Author: Kent Yao <[email protected]>
AuthorDate: Thu Jun 26 18:13:21 2025 -0700
[SPARK-52583][SQL] Add an Developer API for stringifying values in
UserDefinedType
### What changes were proposed in this pull request?
This PR proposes to add a Developer API for stringifying values in
UserDefinedType. When casting a Class X instance that UserDefinedType
represents in the Catalyst layer, this API will be called to get the string
representation. The default implementation of this API is to call
`obj.toString` with the AS-IS behavior.
### Why are the changes needed?
The Class X's toString method doesn't always meet the needs of users, and
it's difficult for them to override it in some circumstances. For example, it's
a JVM implementation or from other dependent projects.
This stringifyValue API gives a user or developer an extra opportunity to
do the override.
### Does this PR introduce _any_ user-facing change?
No, it's a developer API addition and the behavior remains
### How was this patch tested?
New test cases
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #51289 from yaooqinn/SPARK-52583.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/types/UserDefinedType.scala | 16 ++++++++++++++++
.../spark/sql/catalyst/expressions/ToStringBase.scala | 4 ++--
.../org/apache/spark/sql/UserDefinedTypeSuite.scala | 13 +++++++++++++
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index dd8ca26c5246..3d3521d88fdf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -94,6 +94,22 @@ abstract class UserDefinedType[UserType >: Null] extends
DataType with Serializa
}
override def catalogString: String = sqlType.simpleString
+
+ /**
+ * This method is used to convert the value of a UDT to a string
representation.
+ *
+ * By default, it simply calls `toString` on the object.
+ *
+ * @param obj
+ * The object to convert to a string.
+ * @return
+ * A string representation of the object.
+ * @since 4.1.0
+ */
+ @Since("4.1.0")
+ def stringifyValue(obj: Any): String = {
+ obj.toString
+ }
}
private[spark] object UserDefinedType {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
index 2e649763a9ac..8a64ad0a2307 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
@@ -165,7 +165,7 @@ trait ToStringBase { self: UnaryExpression with
TimeZoneAwareExpression =>
})
case pudt: PythonUserDefinedType => castToString(pudt.sqlType)
case udt: UserDefinedType[_] =>
- o => UTF8String.fromString(udt.deserialize(o).toString)
+ o => UTF8String.fromString(udt.stringifyValue(udt.deserialize(o)))
case YearMonthIntervalType(startField, endField) =>
acceptAny[Int](i => UTF8String.fromString(
IntervalUtils.toYearMonthIntervalString(i, ANSI_STYLE, startField,
endField)))
@@ -274,7 +274,7 @@ trait ToStringBase { self: UnaryExpression with
TimeZoneAwareExpression =>
case udt: UserDefinedType[_] =>
val udtRef = JavaCode.global(ctx.addReferenceObj("udt", udt),
udt.sqlType)
(c, evPrim) =>
- code"$evPrim =
UTF8String.fromString($udtRef.deserialize($c).toString());"
+ code"$evPrim =
UTF8String.fromString($udtRef.stringifyValue($udtRef.deserialize($c)));"
case i: YearMonthIntervalType =>
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
val iss = IntervalStringStyles.getClass.getName.stripSuffix("$")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 24175ea8ed94..6d8264d3ad56 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -245,6 +245,19 @@ class UserDefinedTypeSuite extends QueryTest with
SharedSparkSession with Parque
checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)")
}
+ test("SPARK-52583: Cast UserDefinedType to string with custom
stringifyValue") {
+ val udt = new TestUDT.MyDenseVectorUDT() {
+ override def stringifyValue(obj: Any): String = {
+ val v = obj.asInstanceOf[TestUDT.MyDenseVector]
+ v.toString.stripPrefix("(").stripSuffix(")")
+ }
+ }
+ val vector = new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
+ val data = udt.serialize(vector)
+ val ret = Cast(Literal(data, udt), StringType, None)
+ checkEvaluation(ret, "1.0, 3.0, 5.0, 7.0, 9.0")
+ }
+
test("SPARK-28497 Can't up cast UserDefinedType to string") {
val udt = new TestUDT.MyDenseVectorUDT()
assert(!Cast.canUpCast(udt, StringType))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]