(spark) branch branch-4.0 updated: [SPARK-52832][SQL] Fix JDBC dialect identifier quoting

wenchen Fri, 18 Jul 2025 08:02:57 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new d18ad7cc1fbf [SPARK-52832][SQL] Fix JDBC dialect identifier quoting
d18ad7cc1fbf is described below

commit d18ad7cc1fbf3ed2639457f3f94a94b4d40ec4ce
Author: Uros Stankovic <uros.stanko...@databricks.com>
AuthorDate: Fri Jul 18 23:00:40 2025 +0800

    [SPARK-52832][SQL] Fix JDBC dialect identifier quoting
    
    ### What changes were proposed in this pull request?
    - Quote identifier escapes double quotes in base `JDBCDialect`
    - Quote identifier in `MySQLDialect` escapes backticks with another 
backticks
    - Quote identifier in `DatabricksDialect` escapes backticks with another 
backticks
    
    ### Why are the changes needed?
    Resolving a bug when JDBC connector needs to quote an identifier when 
identifier contains double quotes/backticks.
    
    ### Does this PR introduce _any_ user-facing change?
    Fix identifier quoting in JDBC dialect when identifier contains double 
quotes/backticks.
    
    ### How was this patch tested?
    Using JDBCSuite
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #51533 from 
urosstan-db/SPARK-52832-fix-oracle-dialect-identifier-quoting.
    
    Lead-authored-by: Uros Stankovic <uros.stanko...@databricks.com>
    Co-authored-by: Uros Stankovic 
<155642965+urosstan...@users.noreply.github.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 2bc575785465de53376b075b35a23a318d0fcd93)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/jdbc/DatabricksDialect.scala  |  7 +++++-
 .../org/apache/spark/sql/jdbc/JdbcDialects.scala   |  4 +++-
 .../org/apache/spark/sql/jdbc/MySQLDialect.scala   |  9 ++++++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala      | 28 +++++++++++++---------
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
index 1aa2282f4a84..4ebe6f0aa374 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
@@ -50,7 +50,12 @@ private case class DatabricksDialect() extends JdbcDialect 
with NoLegacyJDBCErro
   }
 
   override def quoteIdentifier(colName: String): String = {
-    s"`$colName`"
+    // Per Databricks documentation:
+    // 
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-identifiers
+    //
+    // "Any character from the Unicode character set. Use ` to escape ` 
itself."
+    val escapedColName = colName.replace("`", "``")
+    s"`$escapedColName`"
   }
 
   override def supportsLimit: Boolean = true
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 65e39ca0b91b..b64d72db0b45 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -244,7 +244,9 @@ abstract class JdbcDialect extends Serializable with 
Logging {
    * name is a reserved keyword, or in case it contains characters that 
require quotes (e.g. space).
    */
   def quoteIdentifier(colName: String): String = {
-    s""""$colName""""
+    // By ANSI standard, quotes are escaped with another quotes.
+    val escapedColName = colName.replace("\"", "\"\"")
+    s""""$escapedColName""""
   }
 
   /**
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 4323fa4ed99b..f1c795399f93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -192,7 +192,14 @@ private case class MySQLDialect() extends JdbcDialect with 
SQLConfHelper with No
   }
 
   override def quoteIdentifier(colName: String): String = {
-    s"`$colName`"
+    // Per MySQL documentation: 
https://dev.mysql.com/doc/refman/8.4/en/identifiers.html
+    //
+    // Identifier quote characters can be included within an identifier if you 
quote the
+    // identifier. If the character to be included within the identifier is 
the same as
+    // that used to quote the identifier itself, then you need to double the 
character.
+    // The following statement creates a table named a`b that contains a 
column named c"d:
+    val escapedColName = colName.replace("`", "``")
+    s"`$escapedColName`"
   }
 
   override def schemasExists(conn: Connection, options: JDBCOptions, schema: 
String): Boolean = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a1d83ee66508..05fc1a0316c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -795,17 +795,23 @@ class JDBCSuite extends QueryTest with SharedSparkSession 
{
   }
 
   test("quote column names by jdbc dialect") {
-    val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
-    val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
-    val Derby = JdbcDialects.get("jdbc:derby:db")
-
-    val columns = Seq("abc", "key")
-    val MySQLColumns = columns.map(MySQL.quoteIdentifier(_))
-    val PostgresColumns = columns.map(Postgres.quoteIdentifier(_))
-    val DerbyColumns = columns.map(Derby.quoteIdentifier(_))
-    assert(MySQLColumns === Seq("`abc`", "`key`"))
-    assert(PostgresColumns === Seq(""""abc"""", """"key""""))
-    assert(DerbyColumns === Seq(""""abc"""", """"key""""))
+    val mySQLDialect = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
+    val postgresDialect = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
+    val derbyDialect = JdbcDialects.get("jdbc:derby:db")
+    val oracleDialect = 
JdbcDialects.get("jdbc:oracle:thin:@//localhost:1521/orcl")
+    val databricksDialect = JdbcDialects.get("jdbc:databricks://host/db")
+
+    val columns = Seq("abc", "key", "double_quote\"", "back`")
+    val mySQLColumns = columns.map(mySQLDialect.quoteIdentifier)
+    val postgresColumns = columns.map(postgresDialect.quoteIdentifier)
+    val derbyColumns = columns.map(derbyDialect.quoteIdentifier)
+    val oracleColumns = columns.map(oracleDialect.quoteIdentifier)
+    val databricksColumns = columns.map(databricksDialect.quoteIdentifier)
+    assertResult(Seq("`abc`", "`key`", "`double_quote\"`", 
"`back```"))(mySQLColumns)
+    assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", 
"\"back`\""))(postgresColumns)
+    assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", 
"\"back`\""))(derbyColumns)
+    assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", 
"\"back`\""))(oracleColumns)
+    assertResult(Seq("`abc`", "`key`", "`double_quote\"`", 
"`back```"))(databricksColumns)
   }
 
   test("compile filters") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch branch-4.0 updated: [SPARK-52832][SQL] Fix JDBC dialect identifier quoting

Reply via email to