This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 067969ff9467 [SPARK-54038][CONNECT] Support getSQLKeywords for
SparkConnectDatabaseMetaData
067969ff9467 is described below
commit 067969ff946712eeabf47040415f25000837cd87
Author: Cheng Pan <[email protected]>
AuthorDate: Thu Oct 30 15:25:53 2025 +0800
[SPARK-54038][CONNECT] Support getSQLKeywords for
SparkConnectDatabaseMetaData
### What changes were proposed in this pull request?
Implement `getSQLKeywords` of the `java.sql.DatabaseMetaData` interface for
`SparkConnectDatabaseMetaData`
```
/**
* Retrieves a comma-separated list of all of this database's SQL
keywords
* that are NOT also SQL:2003 keywords.
*
* return the list of this database's keywords that are not also
* SQL:2003 keywords
* throws SQLException if a database access error occurs
*/
String getSQLKeywords() throws SQLException;
```
Note: I don't find the official SQL:2003 keywords list, instead, I refer to
PostgreSQL 9.1 docs (PostgreSQL 9.1 is the latest version that lists SQL:2003
keywords in the table)
https://www.postgresql.org/docs/9.1/sql-keywords-appendix.html
### Why are the changes needed?
Improve JDBC API implementation coverage.
### Does this PR introduce _any_ user-facing change?
No, it's a new feature under development.
### How was this patch tested?
New UT is added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52757 from pan3793/SPARK-54038.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../client/jdbc/SparkConnectDatabaseMetaData.scala | 66 +++++++++++++++++++++-
.../jdbc/SparkConnectDatabaseMetaDataSuite.scala | 9 +++
2 files changed, 73 insertions(+), 2 deletions(-)
diff --git
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala
index 097bd3a0dc6c..a16cba5e3da4 100644
---
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala
+++
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connect.client.jdbc
import java.sql.{Array => _, _}
import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION}
+import org.apache.spark.sql.connect.client.jdbc.SparkConnectDatabaseMetaData._
import org.apache.spark.util.VersionUtils
class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends
DatabaseMetaData {
@@ -76,8 +77,11 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def getIdentifierQuoteString: String = "`"
- override def getSQLKeywords: String =
- throw new SQLFeatureNotSupportedException
+ override def getSQLKeywords: String = {
+ conn.checkOpen()
+ conn.spark.sql("SELECT keyword FROM sql_keywords()").collect()
+ .map(_.getString(0)).diff(SQL_2003_RESERVED_KEYWORDS).mkString(",")
+ }
override def getNumericFunctions: String =
throw new SQLFeatureNotSupportedException
@@ -494,3 +498,61 @@ class SparkConnectDatabaseMetaData(conn:
SparkConnectConnection) extends Databas
override def isWrapperFor(iface: Class[_]): Boolean = iface.isInstance(this)
}
+
+object SparkConnectDatabaseMetaData {
+
+ // SQL:2003 reserved keywords refers to PostgreSQL 9.1 docs:
+ // https://www.postgresql.org/docs/9.1/sql-keywords-appendix.html
+ private[jdbc] val SQL_2003_RESERVED_KEYWORDS = Array(
+ "ABS", "ALL", "ALLOCATE", "ALTER", "AND", "ANY", "ARE", "ARRAY", "AS",
"ASENSITIVE",
+ "ASYMMETRIC", "AT", "ATOMIC", "AUTHORIZATION", "AVG",
+ "BEGIN", "BETWEEN", "BIGINT", "BINARY", "BLOB", "BOOLEAN", "BOTH", "BY",
+ "CALL", "CALLED", "CARDINALITY", "CASCADED", "CASE", "CAST", "CEIL",
"CEILING", "CHAR",
+ "CHARACTER", "CHARACTER_LENGTH", "CHAR_LENGTH", "CHECK", "CLOB", "CLOSE",
"COALESCE",
+ "COLLATE", "COLLECT", "COLUMN", "COMMIT", "CONDITION", "CONNECT",
"CONSTRAINT", "CONVERT",
+ "CORR", "CORRESPONDING", "COUNT", "COVAR_POP", "COVAR_SAMP", "CREATE",
"CROSS", "CUBE",
+ "CUME_DIST", "CURRENT", "CURRENT_DATE", "CURRENT_DEFAULT_TRANSFORM_GROUP",
"CURRENT_PATH",
+ "CURRENT_ROLE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
"CURRENT_TRANSFORM_GROUP_FOR_TYPE",
+ "CURRENT_USER", "CURSOR", "CYCLE",
+ "DATALINK", "DATE", "DAY", "DEALLOCATE", "DEC", "DECIMAL", "DECLARE",
"DEFAULT", "DELETE",
+ "DENSE_RANK", "DEREF", "DESCRIBE", "DETERMINISTIC", "DISCONNECT",
"DISTINCT", "DLNEWCOPY",
+ "DLPREVIOUSCOPY", "DLURLCOMPLETE", "DLURLCOMPLETEONLY",
"DLURLCOMPLETEWRITE", "DLURLPATH",
+ "DLURLPATHONLY", "DLURLPATHWRITE", "DLURLSCHEME", "DLURLSERVER",
"DLVALUE", "DOUBLE",
+ "DROP", "DYNAMIC",
+ "EACH", "ELEMENT", "ELSE", "END", "END-EXEC", "ESCAPE", "EVERY", "EXCEPT",
"EXEC",
+ "EXECUTE", "EXISTS", "EXP", "EXTERNAL", "EXTRACT",
+ "FALSE", "FETCH", "FILTER", "FLOAT", "FLOOR", "FOR", "FOREIGN", "FREE",
"FROM", "FULL",
+ "FUNCTION", "FUSION",
+ "GET", "GLOBAL", "GRANT", "GROUP", "GROUPING",
+ "HAVING", "HOLD", "HOUR",
+ "IDENTITY", "IMPORT", "IN", "INDICATOR", "INNER", "INOUT", "INSENSITIVE",
"INSERT", "INT",
+ "INTEGER", "INTERSECT", "INTERSECTION", "INTERVAL", "INTO", "IS",
+ "JOIN",
+ "LANGUAGE", "LARGE", "LATERAL", "LEADING", "LEFT", "LIKE", "LN", "LOCAL",
"LOCALTIME",
+ "LOCALTIMESTAMP", "LOWER",
+ "MATCH", "MAX", "MEMBER", "MERGE", "METHOD", "MIN", "MINUTE", "MOD",
"MODIFIES", "MODULE",
+ "MONTH", "MULTISET",
+ "NATIONAL", "NATURAL", "NCHAR", "NCLOB", "NEW", "NO", "NONE", "NORMALIZE",
"NOT", "NULL",
+ "NULLIF", "NUMERIC",
+ "OCTET_LENGTH", "OF", "OLD", "ON", "ONLY", "OPEN", "OR", "ORDER", "OUT",
"OUTER", "OVER",
+ "OVERLAPS", "OVERLAY",
+ "PARAMETER", "PARTITION", "PERCENTILE_CONT", "PERCENTILE_DISC",
"PERCENT_RANK", "POSITION",
+ "POWER", "PRECISION", "PREPARE", "PRIMARY", "PROCEDURE",
+ "RANGE", "RANK", "READS", "REAL", "RECURSIVE", "REF", "REFERENCES",
"REFERENCING",
+ "REGR_AVGX", "REGR_AVGY", "REGR_COUNT", "REGR_INTERCEPT", "REGR_R2",
"REGR_SLOPE",
+ "REGR_SXX", "REGR_SXY", "REGR_SYY", "RELEASE", "RESULT", "RETURN",
"RETURNS", "REVOKE",
+ "RIGHT", "ROLLBACK", "ROLLUP", "ROW", "ROWS", "ROW_NUMBER",
+ "SAVEPOINT", "SCOPE", "SCROLL", "SEARCH", "SECOND", "SELECT", "SENSITIVE",
"SESSION_USER",
+ "SET", "SIMILAR", "SMALLINT", "SOME", "SPECIFIC", "SPECIFICTYPE", "SQL",
"SQLEXCEPTION",
+ "SQLSTATE", "SQLWARNING", "SQRT", "START", "STATIC", "STDDEV_POP",
"STDDEV_SAMP",
+ "SUBMULTISET", "SUBSTRING", "SUM", "SYMMETRIC", "SYSTEM", "SYSTEM_USER",
+ "TABLE", "TABLESAMPLE", "THEN", "TIME", "TIMESTAMP", "TIMEZONE_HOUR",
"TIMEZONE_MINUTE",
+ "TO", "TRAILING", "TRANSLATE", "TRANSLATION", "TREAT", "TRIGGER", "TRIM",
"TRUE",
+ "UESCAPE", "UNION", "UNIQUE", "UNKNOWN", "UNNEST", "UPDATE", "UPPER",
"USER", "USING",
+ "VALUE", "VALUES", "VARCHAR", "VARYING", "VAR_POP", "VAR_SAMP",
+ "WHEN", "WHENEVER", "WHERE", "WIDTH_BUCKET", "WINDOW", "WITH", "WITHIN",
"WITHOUT",
+ "XML", "XMLAGG", "XMLATTRIBUTES", "XMLBINARY", "XMLCOMMENT", "XMLCONCAT",
"XMLELEMENT",
+ "XMLFOREST", "XMLNAMESPACES", "XMLPARSE", "XMLPI", "XMLROOT",
"XMLSERIALIZE",
+ "YEAR"
+ )
+}
diff --git
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
index ee212a0c4be5..b2ecc163b2b8 100644
---
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
+++
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala
@@ -190,4 +190,13 @@ class SparkConnectDatabaseMetaDataSuite extends
ConnectFunSuite with RemoteSpark
assert(metadata.supportsSharding === false)
}
}
+
+ test("SparkConnectDatabaseMetaData getSQLKeywords") {
+ withConnection { conn =>
+ val metadata = conn.getMetaData
+ // scalastyle:off line.size.limit
+ assert(metadata.getSQLKeywords ===
"ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CASCADE,CATALOG,CATALOGS,CHANGE,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,E
[...]
+ // scalastyle:on line.size.limit
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]