This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d54b9b5d32d5 [SPARK-52788][SQL][4.0] Fix error of converting binary
value in BinaryType to XML
d54b9b5d32d5 is described below
commit d54b9b5d32d5657c35203fee76a8a17456157043
Author: Kent Yao <[email protected]>
AuthorDate: Tue Jul 22 13:06:11 2025 +0800
[SPARK-52788][SQL][4.0] Fix error of converting binary value in BinaryType
to XML
### What changes were proposed in this pull request?
Fix errors like 'Failed to convert value [B2fb2f244 (class of class [B) in
type BinaryType to XML'
### Why are the changes needed?
bugfix
### Does this PR introduce _any_ user-facing change? no
### How was this patch tested?
new tests
### Was this patch authored or co-authored using generative AI tooling? no
Closes #51470 from yaooqinn/SPARK-52788.
Authored-by: Kent Yao <yaoapache.org>
Closes #51610 from yaooqinn/SPARK-52788-40.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala | 4 ++++
.../test/resources/sql-tests/analyzer-results/binary.sql.out | 7 +++++++
.../sql-tests/analyzer-results/binary_base64.sql.out | 7 +++++++
.../sql-tests/analyzer-results/binary_basic.sql.out | 7 +++++++
.../resources/sql-tests/analyzer-results/binary_hex.sql.out | 7 +++++++
.../sql-tests/analyzer-results/binary_hex_discrete.sql.out | 7 +++++++
sql/core/src/test/resources/sql-tests/inputs/binary.sql | 3 ++-
sql/core/src/test/resources/sql-tests/results/binary.sql.out | 12 ++++++++++++
.../test/resources/sql-tests/results/binary_base64.sql.out | 12 ++++++++++++
.../test/resources/sql-tests/results/binary_basic.sql.out | 12 ++++++++++++
.../src/test/resources/sql-tests/results/binary_hex.sql.out | 12 ++++++++++++
.../resources/sql-tests/results/binary_hex_discrete.sql.out | 12 ++++++++++++
12 files changed, 101 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index b67882457447..cf2765bfd243 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -26,6 +26,7 @@ import
org.apache.hadoop.shaded.com.ctc.wstx.api.WstxOutputProperties
import org.apache.spark.SparkIllegalArgumentException
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.ToStringBase
import org.apache.spark.sql.catalyst.util.{ArrayData, DateFormatter,
DateTimeUtils, MapData, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.types._
@@ -61,6 +62,8 @@ class StaxXmlGenerator(
legacyFormat = FAST_DATE_FORMAT,
isParsing = false)
+ private val binaryFormatter = ToStringBase.getBinaryFormatter
+
private val gen = {
val factory = XMLOutputFactory.newInstance()
// to_xml disables structure validation to allow multiple root tags
@@ -187,6 +190,7 @@ class StaxXmlGenerator(
case (DecimalType(), v: Decimal) => gen.writeCharacters(v.toString)
case (ByteType, v: Byte) => gen.writeCharacters(v.toString)
case (BooleanType, v: Boolean) => gen.writeCharacters(v.toString)
+ case (BinaryType, v: Array[Byte]) =>
gen.writeCharacters(binaryFormatter(v).toString)
// For the case roundtrip in reading and writing XML files, [[ArrayType]]
cannot have
// [[ArrayType]] as element type. It always wraps the element with
[[StructType]]. So,
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
-- !query analysis
Project [to_csv(named_struct(n, 1, info,
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333),
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org,
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
index fe61e684a7ff..d21abdc824e0 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
-- !query analysis
Project [to_csv(named_struct(n, 1, info,
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333),
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org,
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
index fe61e684a7ff..d21abdc824e0 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
-- !query analysis
Project [to_csv(named_struct(n, 1, info,
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333),
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org,
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
index fe61e684a7ff..d21abdc824e0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
-- !query analysis
Project [to_csv(named_struct(n, 1, info,
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333),
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org,
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
index fe61e684a7ff..d21abdc824e0 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
@@ -32,3 +32,10 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
-- !query analysis
Project [to_csv(named_struct(n, 1, info,
0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333),
Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
+- OneRowRelation
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query analysis
+Project [to_xml(named_struct(name, cast(Eason as binary), birth, 2018, org,
cast(Kindergarten Cop as binary)), Some(America/Los_Angeles)) AS
to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten Cop))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary.sql
b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
index fc875b0afb0e..8da97e466341 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/binary.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
@@ -4,4 +4,5 @@ SELECT X'';
SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333';
SELECT CAST('Spark' as BINARY);
SELECT array( X'',
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as
BINARY));
-SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
\ No newline at end of file
+SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')));
diff --git a/sql/core/src/test/resources/sql-tests/results/binary.sql.out
b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
index 050f05271411..9571d9130f73 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
struct<to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,Eason Yao 2018-11-17:13:33:33
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten
Cop)):string>
+-- !query output
+<ROW>
+ <name>Eason</name>
+ <birth>2018</birth>
+ <org>Kindergarten Cop</org>
+</ROW>
diff --git
a/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
index 8724e8620b48..ef45d059bc81 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
struct<to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten
Cop)):string>
+-- !query output
+<ROW>
+ <name>RWFzb24</name>
+ <birth>2018</birth>
+ <org>S2luZGVyZ2FydGVuIENvcA</org>
+</ROW>
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
index 0c543a7b4547..0118df765df1 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
struct<to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,"[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49,
45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51]"
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten
Cop)):string>
+-- !query output
+<ROW>
+ <name>[69, 97, 115, 111, 110]</name>
+ <birth>2018</birth>
+ <org>[75, 105, 110, 100, 101, 114, 103, 97, 114, 116, 101, 110, 32, 67,
111, 112]</org>
+</ROW>
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
index d977301f98e0..d97f6efae292 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
struct<to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten
Cop)):string>
+-- !query output
+<ROW>
+ <name>4561736F6E</name>
+ <birth>2018</birth>
+ <org>4B696E64657267617274656E20436F70</org>
+</ROW>
diff --git
a/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
index 3fc6c0f53cc5..e0dc049db833 100644
--- a/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
@@ -37,3 +37,15 @@ SELECT to_csv(named_struct('n', 1, 'info',
X'4561736F6E2059616F20323031382D31312
struct<to_csv(named_struct(n, 1, info,
X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
-- !query output
1,[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33
33 3A 33 33]
+
+
+-- !query
+select to_xml(named_struct('name', binary('Eason'), 'birth', 2018, 'org',
binary('Kindergarten Cop')))
+-- !query schema
+struct<to_xml(named_struct(name, Eason, birth, 2018, org, Kindergarten
Cop)):string>
+-- !query output
+<ROW>
+ <name>[45 61 73 6F 6E]</name>
+ <birth>2018</birth>
+ <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org>
+</ROW>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]