This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new d54b9b5d32d5 [SPARK-52788][SQL][4.0] Fix error of converting binary 
value in BinaryType to XML
d54b9b5d32d5 is described below

commit d54b9b5d32d5657c35203fee76a8a17456157043
Author: Kent Yao <[email protected]>
AuthorDate: Tue Jul 22 13:06:11 2025 +0800

    [SPARK-52788][SQL][4.0] Fix error of converting binary value in BinaryType 
to XML
    
    ### What changes were proposed in this pull request?
    
    Fix errors like 'Failed to convert value [B2fb2f244 (class of class [B) in 
type BinaryType to XML'
    
    ### Why are the changes needed?
    bugfix
    
    ### Does this PR introduce _any_ user-facing change? no
    
    ### How was this patch tested?
    new tests
    
    ### Was this patch authored or co-authored using generative AI tooling? no
    
    Closes #51470 from yaooqinn/SPARK-52788.
    
    Authored-by: Kent Yao <yaoapache.org>
    
    Closes #51610 from yaooqinn/SPARK-52788-40.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala |  4 ++++
 .../test/resources/sql-tests/analyzer-results/binary.sql.out |  7 +++++++
 .../sql-tests/analyzer-results/binary_base64.sql.out         |  7 +++++++
 .../sql-tests/analyzer-results/binary_basic.sql.out          |  7 +++++++
 .../resources/sql-tests/analyzer-results/binary_hex.sql.out  |  7 +++++++
 .../sql-tests/analyzer-results/binary_hex_discrete.sql.out   |  7 +++++++
 sql/core/src/test/resources/sql-tests/inputs/binary.sql      |  3 ++-
 sql/core/src/test/resources/sql-tests/results/binary.sql.out | 12 ++++++++++++
 .../test/resources/sql-tests/results/binary_base64.sql.out   | 12 ++++++++++++
 .../test/resources/sql-tests/results/binary_basic.sql.out    | 12 ++++++++++++
 .../src/test/resources/sql-tests/results/binary_hex.sql.out  | 12 ++++++++++++
 .../resources/sql-tests/results/binary_hex_discrete.sql.out  | 12 ++++++++++++
 12 files changed, 101 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index b67882457447..cf2765bfd243 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -26,6 +26,7 @@ import 
org.apache.hadoop.shaded.com.ctc.wstx.api.WstxOutputProperties
 
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.ToStringBase
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateFormatter, 
DateTimeUtils, MapData, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.types._
@@ -61,6 +62,8 @@ class StaxXmlGenerator(
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = false)
 
+  private val binaryFormatter = ToStringBase.getBinaryFormatter
+
   private val gen = {
     val factory = XMLOutputFactory.newInstance()
     // to_xml disables structure validation to allow multiple root tags
@@ -187,6 +190,7 @@ class StaxXmlGenerator(
     case (DecimalType(), v: Decimal) => gen.writeCharacters(v.toString)
     case (ByteType, v: Byte) => gen.writeCharacters(v.toString)
     case (BooleanType, v: Boolean) => gen.writeCharacters(v.toString)
+    case (BinaryType, v: Array[Byte]) => 
gen.writeCharacters(binaryFormatter(v).toString)
 
     // For the case roundtrip in reading and writing XML files, [[ArrayType]] 
cannot have
     // [[ArrayType]] as element type. It always wraps the element with 
[[StructType]]. So,
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 -- !query analysis
 Project [to_csv(named_struct(n, 1, info, 
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), 
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
 +- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org, 
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS 
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 -- !query analysis
 Project [to_csv(named_struct(n, 1, info, 
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), 
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
 +- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org, 
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS 
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 -- !query analysis
 Project [to_csv(named_struct(n, 1, info, 
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), 
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
 +- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org, 
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS 
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 -- !query analysis
 Project [to_csv(named_struct(n, 1, info, 
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), 
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
 +- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org, 
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS 
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 -- !query analysis
 Project [to_csv(named_struct(n, 1, info, 
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), 
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
 +- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org, 
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS 
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary.sql 
b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
index fc875b0afb0e..8da97e466341 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/binary.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
@@ -4,4 +4,5 @@ SELECT X'';
 SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333';
 SELECT CAST('Spark' as BINARY);
 SELECT array( X'', 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as 
BINARY));
-SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
\ No newline at end of file
+SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')));
diff --git a/sql/core/src/test/resources/sql-tests/results/binary.sql.out 
b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
index 050f05271411..9571d9130f73 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 struct<to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
 -- !query output
 1,Eason Yao 2018-11-17:13:33:33
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten 
Cop)):string>
+-- !query output
+<ROW>
+    <name>Eason</name>
+    <birth>2018</birth>
+    <org>Kindergarten Cop</org>
+</ROW>
diff --git 
a/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out 
b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
index 8724e8620b48..ef45d059bc81 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 struct<to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
 -- !query output
 1,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten 
Cop)):string>
+-- !query output
+<ROW>
+    <name>RWFzb24</name>
+    <birth>2018</birth>
+    <org>S2luZGVyZ2FydGVuIENvcA</org>
+</ROW>
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out 
b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
index 0c543a7b4547..0118df765df1 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 struct<to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
 -- !query output
 1,"[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 
45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51]"
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten 
Cop)):string>
+-- !query output
+<ROW>
+    <name>[69, 97, 115, 111, 110]</name>
+    <birth>2018</birth>
+    <org>[75, 105, 110, 100, 101, 114, 103, 97, 114, 116, 101, 110, 32, 67, 
111, 112]</org>
+</ROW>
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out 
b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
index d977301f98e0..d97f6efae292 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 struct<to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
 -- !query output
 1,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten 
Cop)):string>
+-- !query output
+<ROW>
+    <name>4561736F6E</name>
+    <birth>2018</birth>
+    <org>4B696E64657267617274656E20436F70</org>
+</ROW>
diff --git 
a/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out 
b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
index 3fc6c0f53cc5..e0dc049db833 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info', 
X'4561736F6E2059616F20323031382D31312
 struct<to_csv(named_struct(n, 1, info, 
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
 -- !query output
 1,[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 
33 3A 33 33]
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org', 
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten 
Cop)):string>
+-- !query output
+<ROW>
+    <name>[45 61 73 6F 6E]</name>
+    <birth>2018</birth>
+    <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org>
+</ROW>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to