This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new d18ad7cc1fbf [SPARK-52832][SQL] Fix JDBC dialect identifier quoting d18ad7cc1fbf is described below commit d18ad7cc1fbf3ed2639457f3f94a94b4d40ec4ce Author: Uros Stankovic <uros.stanko...@databricks.com> AuthorDate: Fri Jul 18 23:00:40 2025 +0800 [SPARK-52832][SQL] Fix JDBC dialect identifier quoting ### What changes were proposed in this pull request? - Quote identifier escapes double quotes in base `JDBCDialect` - Quote identifier in `MySQLDialect` escapes backticks with another backticks - Quote identifier in `DatabricksDialect` escapes backticks with another backticks ### Why are the changes needed? Resolving a bug when JDBC connector needs to quote an identifier when identifier contains double quotes/backticks. ### Does this PR introduce _any_ user-facing change? Fix identifier quoting in JDBC dialect when identifier contains double quotes/backticks. ### How was this patch tested? Using JDBCSuite ### Was this patch authored or co-authored using generative AI tooling? No Closes #51533 from urosstan-db/SPARK-52832-fix-oracle-dialect-identifier-quoting. Lead-authored-by: Uros Stankovic <uros.stanko...@databricks.com> Co-authored-by: Uros Stankovic <155642965+urosstan...@users.noreply.github.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 2bc575785465de53376b075b35a23a318d0fcd93) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/jdbc/DatabricksDialect.scala | 7 +++++- .../org/apache/spark/sql/jdbc/JdbcDialects.scala | 4 +++- .../org/apache/spark/sql/jdbc/MySQLDialect.scala | 9 ++++++- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 28 +++++++++++++--------- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala index 1aa2282f4a84..4ebe6f0aa374 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala @@ -50,7 +50,12 @@ private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCErro } override def quoteIdentifier(colName: String): String = { - s"`$colName`" + // Per Databricks documentation: + // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-identifiers + // + // "Any character from the Unicode character set. Use ` to escape ` itself." + val escapedColName = colName.replace("`", "``") + s"`$escapedColName`" } override def supportsLimit: Boolean = true diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 65e39ca0b91b..b64d72db0b45 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -244,7 +244,9 @@ abstract class JdbcDialect extends Serializable with Logging { * name is a reserved keyword, or in case it contains characters that require quotes (e.g. space). */ def quoteIdentifier(colName: String): String = { - s""""$colName"""" + // By ANSI standard, quotes are escaped with another quotes. + val escapedColName = colName.replace("\"", "\"\"") + s""""$escapedColName"""" } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index 4323fa4ed99b..f1c795399f93 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -192,7 +192,14 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No } override def quoteIdentifier(colName: String): String = { - s"`$colName`" + // Per MySQL documentation: https://dev.mysql.com/doc/refman/8.4/en/identifiers.html + // + // Identifier quote characters can be included within an identifier if you quote the + // identifier. If the character to be included within the identifier is the same as + // that used to quote the identifier itself, then you need to double the character. + // The following statement creates a table named a`b that contains a column named c"d: + val escapedColName = colName.replace("`", "``") + s"`$escapedColName`" } override def schemasExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index a1d83ee66508..05fc1a0316c3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -795,17 +795,23 @@ class JDBCSuite extends QueryTest with SharedSparkSession { } test("quote column names by jdbc dialect") { - val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db") - val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") - val Derby = JdbcDialects.get("jdbc:derby:db") - - val columns = Seq("abc", "key") - val MySQLColumns = columns.map(MySQL.quoteIdentifier(_)) - val PostgresColumns = columns.map(Postgres.quoteIdentifier(_)) - val DerbyColumns = columns.map(Derby.quoteIdentifier(_)) - assert(MySQLColumns === Seq("`abc`", "`key`")) - assert(PostgresColumns === Seq(""""abc"""", """"key"""")) - assert(DerbyColumns === Seq(""""abc"""", """"key"""")) + val mySQLDialect = JdbcDialects.get("jdbc:mysql://127.0.0.1/db") + val postgresDialect = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") + val derbyDialect = JdbcDialects.get("jdbc:derby:db") + val oracleDialect = JdbcDialects.get("jdbc:oracle:thin:@//localhost:1521/orcl") + val databricksDialect = JdbcDialects.get("jdbc:databricks://host/db") + + val columns = Seq("abc", "key", "double_quote\"", "back`") + val mySQLColumns = columns.map(mySQLDialect.quoteIdentifier) + val postgresColumns = columns.map(postgresDialect.quoteIdentifier) + val derbyColumns = columns.map(derbyDialect.quoteIdentifier) + val oracleColumns = columns.map(oracleDialect.quoteIdentifier) + val databricksColumns = columns.map(databricksDialect.quoteIdentifier) + assertResult(Seq("`abc`", "`key`", "`double_quote\"`", "`back```"))(mySQLColumns) + assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(postgresColumns) + assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(derbyColumns) + assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(oracleColumns) + assertResult(Seq("`abc`", "`key`", "`double_quote\"`", "`back```"))(databricksColumns) } test("compile filters") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org