This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f5b9ea8103dd [SPARK-54552][CONNECT] Fix 
`SparkConnectResultSet.getString` to handle BINARY data type with `UTF_8`
f5b9ea8103dd is described below

commit f5b9ea8103dd1f37c6f0cea0692d7bc5b50b778c
Author: vinodkc <[email protected]>
AuthorDate: Fri Nov 28 18:57:43 2025 -0800

    [SPARK-54552][CONNECT] Fix `SparkConnectResultSet.getString` to handle 
BINARY data type with `UTF_8`
    
    ### What changes were proposed in this pull request?
    
    Fixed `SparkConnectResultSet.getString()` to properly convert BINARY data 
to UTF-8 strings instead of returning byte array object references (e.g., 
"[B<hashcode>").
    
    ### Why are the changes needed?
    
    The current implementation violates JDBC specification behavior. Users 
calling getString() on BINARY columns expect UTF-8 decoded strings, not Java 
object references.
    
    Before
    ```
    SELECT binary('xDeAdBeEf')
    
    spark-sql: `\xDeAdBeEf`
    beeline with STS: `\xDeAdBeEf`
    beeline with Connect Server: `[B4d518c66`
    ```
    
    After
    ```
    SELECT binary('xDeAdBeEf')
    
    spark-sql: `\xDeAdBeEf`
    beeline with STS: `\xDeAdBeEf`
    beeline with Connect Server: `\xDeAdBeEf`
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. getString() on BINARY columns now returns UTF-8 decoded strings 
instead of byte array references like "[B1a2b3c4d".
    
    ### How was this patch tested?
    
    Added new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #53262 from vinodkc/br_fix_getString_BINARY.
    
    Authored-by: vinodkc <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../client/jdbc/SparkConnectResultSet.scala        |  8 +++++++-
 .../jdbc/SparkConnectJdbcDataTypeSuite.scala       | 24 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
 
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
index 0070cbd93c3e..e90f80f783dc 100644
--- 
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
+++ 
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
@@ -114,7 +114,13 @@ class SparkConnectResultSet(
   }
 
   override def getString(columnIndex: Int): String = {
-    getColumnValue(columnIndex, null: String) { idx => 
String.valueOf(currentRow.get(idx)) }
+    getColumnValue(columnIndex, null: String) { idx =>
+      currentRow.get(idx) match {
+        case bytes: Array[Byte] =>
+          new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
+        case other => String.valueOf(other)
+      }
+    }
   }
 
   override def getBoolean(columnIndex: Int): Boolean = {
diff --git 
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
 
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
index eb3afcc1bcf2..9a1db36514da 100644
--- 
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
+++ 
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
@@ -385,6 +385,11 @@ class SparkConnectJdbcDataTypeSuite extends 
ConnectFunSuite with RemoteSparkSess
       assert(bytes.length === testBytes.length)
       assert(bytes.sameElements(testBytes))
       assert(!rs.wasNull)
+
+      val stringValue = rs.getString(1)
+      val expectedString = new String(testBytes, 
java.nio.charset.StandardCharsets.UTF_8)
+      assert(stringValue === expectedString)
+
       assert(!rs.next())
 
       val metaData = rs.getMetaData
@@ -396,6 +401,22 @@ class SparkConnectJdbcDataTypeSuite extends 
ConnectFunSuite with RemoteSparkSess
     }
   }
 
+  test("get binary type with UTF-8 text") {
+    val textBytes = 
"\\xDeAdBeEf".getBytes(java.nio.charset.StandardCharsets.UTF_8)
+    val hexString = textBytes.map(b => "%02X".format(b)).mkString
+    withExecuteQuery(s"SELECT CAST(X'$hexString' AS BINARY)") { rs =>
+      assert(rs.next())
+      val bytes = rs.getBytes(1)
+      assert(bytes !== null)
+      assert(bytes.sameElements(textBytes))
+
+      val stringValue = rs.getString(1)
+      assert(stringValue === "\\xDeAdBeEf")
+
+      assert(!rs.next())
+    }
+  }
+
   test("get binary type with null") {
     withExecuteQuery("SELECT cast(null as binary)") { rs =>
       assert(rs.next())
@@ -437,6 +458,9 @@ class SparkConnectJdbcDataTypeSuite extends ConnectFunSuite 
with RemoteSparkSess
       assert(bytes !== null)
       assert(bytes.length === 0)
       assert(!rs.wasNull)
+
+      val stringValue = rs.getString(1)
+      assert(stringValue === "")
       assert(!rs.next())
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to