This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 68718fe6fda8 [SPARK-49284][CONNECT][SQL] Create a shared Catalog
interface
68718fe6fda8 is described below
commit 68718fe6fda8e8abd07296cb9c26fb898e65f57e
Author: Herman van Hovell <[email protected]>
AuthorDate: Fri Sep 6 17:57:13 2024 +0900
[SPARK-49284][CONNECT][SQL] Create a shared Catalog interface
### What changes were proposed in this pull request?
This PR creates a shared Catalog interface. We also move the interfaces to
sql/api.
### Why are the changes needed?
We are creating a shared Scala Spark SQL interface for Classic and Connect.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47991 from hvanhovell/SPARK-49284.
Lead-authored-by: Herman van Hovell <[email protected]>
Co-authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../scala/org/apache/spark/sql/SparkSession.scala | 7 +-
.../org/apache/spark/sql/catalog/Catalog.scala | 737 ++++-----------------
project/MimaExcludes.scala | 7 +
.../scala/org/apache/spark/sql/api}/Catalog.scala | 170 ++---
.../org/apache/spark/sql/api/SparkSession.scala | 8 +
.../org/apache/spark/sql/catalog/interface.scala | 22 +-
.../scala/org/apache/spark/sql/SparkSession.scala | 7 +-
.../org/apache/spark/sql/catalog/Catalog.scala | 685 +++----------------
.../org/apache/spark/sql/catalog/interface.scala | 224 -------
9 files changed, 321 insertions(+), 1546 deletions(-)
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 93be185e0ffd..209ec88618c4 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -230,12 +230,7 @@ class SparkSession private[sql] (
lazy val streams: StreamingQueryManager = new StreamingQueryManager(this)
- /**
- * Interface through which the user may create, drop, alter or query
underlying databases,
- * tables, functions etc.
- *
- * @since 3.5.0
- */
+ /** @inheritdoc */
lazy val catalog: Catalog = new CatalogImpl(this)
/** @inheritdoc */
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index cf0fef147ee8..11a4a044d20e 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -17,660 +17,151 @@
package org.apache.spark.sql.catalog
-import scala.jdk.CollectionConverters._
+import java.util
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
+import org.apache.spark.sql.{api, DataFrame, Dataset}
import org.apache.spark.sql.types.StructType
-import org.apache.spark.storage.StorageLevel
-/**
- * Catalog interface for Spark. To access this, use `SparkSession.catalog`.
- *
- * @since 3.5.0
- */
-abstract class Catalog {
-
- /**
- * Returns the current database (namespace) in this session.
- *
- * @since 3.5.0
- */
- def currentDatabase: String
-
- /**
- * Sets the current database (namespace) in this session.
- *
- * @since 3.5.0
- */
- def setCurrentDatabase(dbName: String): Unit
-
- /**
- * Returns a list of databases (namespaces) available within the current
catalog.
- *
- * @since 3.5.0
- */
- def listDatabases(): Dataset[Database]
-
- /**
- * Returns a list of databases (namespaces) which name match the specify
pattern and available
- * within the current catalog.
- *
- * @since 3.5.0
- */
- def listDatabases(pattern: String): Dataset[Database]
-
- /**
- * Returns a list of tables/views in the current database (namespace). This
includes all
- * temporary views.
- *
- * @since 3.5.0
- */
- def listTables(): Dataset[Table]
-
- /**
- * Returns a list of tables/views in the specified database (namespace) (the
name can be
- * qualified with catalog). This includes all temporary views.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listTables(dbName: String): Dataset[Table]
-
- /**
- * Returns a list of tables/views in the specified database (namespace)
which name match the
- * specify pattern (the name can be qualified with catalog). This includes
all temporary views.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listTables(dbName: String, pattern: String): Dataset[Table]
-
- /**
- * Returns a list of functions registered in the current database
(namespace). This includes all
- * temporary functions.
- *
- * @since 3.5.0
- */
- def listFunctions(): Dataset[Function]
-
- /**
- * Returns a list of functions registered in the specified database
(namespace) (the name can be
- * qualified with catalog). This includes all built-in and temporary
functions.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String): Dataset[Function]
-
- /**
- * Returns a list of functions registered in the specified database
(namespace) which name match
- * the specify pattern (the name can be qualified with catalog). This
includes all built-in and
- * temporary functions.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String, pattern: String): Dataset[Function]
-
- /**
- * Returns a list of columns for the given table/view or temporary view.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
It follows the same
- * resolution rule with SQL: search for temp views first then table/views
in the current
- * database (namespace).
- * @since 3.5.0
- */
- @throws[AnalysisException]("table does not exist")
- def listColumns(tableName: String): Dataset[Column]
-
- /**
- * Returns a list of columns for the given table/view in the specified
database under the Hive
- * Metastore.
- *
- * To list columns for table/view in other catalogs, please use
`listColumns(tableName)` with
- * qualified table/view name instead.
- *
- * @param dbName
- * is an unqualified name that designates a database.
- * @param tableName
- * is an unqualified name that designates a table/view.
- * @since 3.5.0
- */
- @throws[AnalysisException]("database or table does not exist")
- def listColumns(dbName: String, tableName: String): Dataset[Column]
-
- /**
- * Get the database (namespace) with the specified name (can be qualified
with catalog). This
- * throws an AnalysisException when the database (namespace) cannot be found.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def getDatabase(dbName: String): Database
-
- /**
- * Get the table or view with the specified name. This table can be a
temporary view or a
- * table/view. This throws an AnalysisException when no Table can be found.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
It follows the same
- * resolution rule with SQL: search for temp views first then table/views
in the current
- * database (namespace).
- * @since 3.5.0
- */
- @throws[AnalysisException]("table does not exist")
- def getTable(tableName: String): Table
-
- /**
- * Get the table or view with the specified name in the specified database
under the Hive
- * Metastore. This throws an AnalysisException when no Table can be found.
- *
- * To get table/view in other catalogs, please use `getTable(tableName)`
with qualified
- * table/view name instead.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database or table does not exist")
- def getTable(dbName: String, tableName: String): Table
-
- /**
- * Get the function with the specified name. This function can be a
temporary function or a
- * function. This throws an AnalysisException when the function cannot be
found.
- *
- * @param functionName
- * is either a qualified or unqualified name that designates a function.
It follows the same
- * resolution rule with SQL: search for built-in/temp functions first then
functions in the
- * current database (namespace).
- * @since 3.5.0
- */
- @throws[AnalysisException]("function does not exist")
- def getFunction(functionName: String): Function
-
- /**
- * Get the function with the specified name in the specified database under
the Hive Metastore.
- * This throws an AnalysisException when the function cannot be found.
- *
- * To get functions in other catalogs, please use
`getFunction(functionName)` with qualified
- * function name instead.
- *
- * @param dbName
- * is an unqualified name that designates a database.
- * @param functionName
- * is an unqualified name that designates a function in the specified
database
- * @since 3.5.0
- */
- @throws[AnalysisException]("database or function does not exist")
- def getFunction(dbName: String, functionName: String): Function
-
- /**
- * Check if the database (namespace) with the specified name exists (the
name can be qualified
- * with catalog).
- *
- * @since 3.5.0
- */
- def databaseExists(dbName: String): Boolean
-
- /**
- * Check if the table or view with the specified name exists. This can
either be a temporary
- * view or a table/view.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
It follows the same
- * resolution rule with SQL: search for temp views first then table/views
in the current
- * database (namespace).
- * @since 3.5.0
- */
- def tableExists(tableName: String): Boolean
-
- /**
- * Check if the table or view with the specified name exists in the
specified database under the
- * Hive Metastore.
- *
- * To check existence of table/view in other catalogs, please use
`tableExists(tableName)` with
- * qualified table/view name instead.
- *
- * @param dbName
- * is an unqualified name that designates a database.
- * @param tableName
- * is an unqualified name that designates a table.
- * @since 3.5.0
- */
- def tableExists(dbName: String, tableName: String): Boolean
-
- /**
- * Check if the function with the specified name exists. This can either be
a temporary function
- * or a function.
- *
- * @param functionName
- * is either a qualified or unqualified name that designates a function.
It follows the same
- * resolution rule with SQL: search for built-in/temp functions first then
functions in the
- * current database (namespace).
- * @since 3.5.0
- */
- def functionExists(functionName: String): Boolean
-
- /**
- * Check if the function with the specified name exists in the specified
database under the Hive
- * Metastore.
- *
- * To check existence of functions in other catalogs, please use
`functionExists(functionName)`
- * with qualified function name instead.
- *
- * @param dbName
- * is an unqualified name that designates a database.
- * @param functionName
- * is an unqualified name that designates a function.
- * @since 3.5.0
- */
- def functionExists(dbName: String, functionName: String): Boolean
-
- /**
- * Creates a table from the given path and returns the corresponding
DataFrame. It will use the
- * default data source configured by spark.sql.sources.default.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String): DataFrame = {
- createTable(tableName, path)
- }
-
- /**
- * Creates a table from the given path and returns the corresponding
DataFrame. It will use the
- * default data source configured by spark.sql.sources.default.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(tableName: String, path: String): DataFrame
-
- /**
- * Creates a table from the given path based on a data source and returns
the corresponding
- * DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String, source: String):
DataFrame = {
- createTable(tableName, path, source)
- }
-
- /**
- * Creates a table from the given path based on a data source and returns
the corresponding
- * DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(tableName: String, path: String, source: String): DataFrame
-
- /**
- * Creates a table from the given path based on a data source and a set of
options. Then,
- * returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+/** @inheritdoc */
+abstract class Catalog extends api.Catalog[Dataset] {
+
+ /** @inheritdoc */
+ override def listDatabases(): Dataset[Database]
+
+ /** @inheritdoc */
+ override def listDatabases(pattern: String): Dataset[Database]
+
+ /** @inheritdoc */
+ override def listTables(): Dataset[Table]
+
+ /** @inheritdoc */
+ override def listTables(dbName: String): Dataset[Table]
+
+ /** @inheritdoc */
+ override def listTables(dbName: String, pattern: String): Dataset[Table]
+
+ /** @inheritdoc */
+ override def listFunctions(): Dataset[Function]
+
+ /** @inheritdoc */
+ override def listFunctions(dbName: String): Dataset[Function]
+
+ /** @inheritdoc */
+ override def listFunctions(dbName: String, pattern: String):
Dataset[Function]
+
+ /** @inheritdoc */
+ override def listColumns(tableName: String): Dataset[Column]
+
+ /** @inheritdoc */
+ override def listColumns(dbName: String, tableName: String): Dataset[Column]
+
+ /** @inheritdoc */
+ override def createTable(tableName: String, path: String): DataFrame
+
+ /** @inheritdoc */
+ override def createTable(tableName: String, path: String, source: String):
DataFrame
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, options)
- }
-
- /**
- * Creates a table based on the dataset in a data source and a set of
options. Then, returns the
- * corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ options: Map[String, String]): DataFrame
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, options.asScala.toMap)
- }
-
- /**
- * (Scala-specific) Creates a table from the given path based on a data
source and a set of
- * options. Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ description: String,
+ options: Map[String, String]): DataFrame
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: Map[String, String]): DataFrame = {
- createTable(tableName, source, options)
- }
-
- /**
- * (Scala-specific) Creates a table based on the dataset in a data source
and a set of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(tableName: String, source: String, options: Map[String,
String]): DataFrame
-
- /**
- * Create a table from the given path based on a data source, a schema and a
set of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ schema: StructType,
+ options: Map[String, String]): DataFrame
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options)
- }
-
- /**
- * Creates a table based on the dataset in a data source and a set of
options. Then, returns the
- * corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ description: String,
+ options: Map[String, String]): DataFrame
+
+ /** @inheritdoc */
+ override def listCatalogs(): Dataset[CatalogMetadata]
+
+ /** @inheritdoc */
+ override def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+
+ /** @inheritdoc */
+ override def createExternalTable(tableName: String, path: String): DataFrame
=
+ super.createExternalTable(tableName, path)
+
+ /** @inheritdoc */
+ override def createExternalTable(tableName: String, path: String, source:
String): DataFrame =
+ super.createExternalTable(tableName, path, source)
+
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
- description: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(
- tableName,
- source = source,
- description = description,
- options = options.asScala.toMap)
- }
-
- /**
- * (Scala-specific) Creates a table based on the dataset in a data source
and a set of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ options: util.Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, options)
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- description: String,
- options: Map[String, String]): DataFrame
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, options)
- /**
- * Create a table based on the dataset in a data source, a schema and a set
of options. Then,
- * returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
- schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options.asScala.toMap)
- }
-
- /**
- * (Scala-specific) Create a table from the given path based on a data
source, a schema and a
- * set of options. Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ options: Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, options)
+
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- options: Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options)
- }
-
- /**
- * (Scala-specific) Create a table based on the dataset in a data source, a
schema and a set of
- * options. Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ options: util.Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, schema, options)
+
+ /** @inheritdoc */
+ override def createTable(
+ tableName: String,
+ source: String,
+ description: String,
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, description, options)
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
schema: StructType,
- options: Map[String, String]): DataFrame
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, schema, options)
- /**
- * Create a table based on the dataset in a data source, a schema and a set
of options. Then,
- * returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- description: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(
- tableName,
- source = source,
- schema = schema,
- description = description,
- options = options.asScala.toMap)
- }
-
- /**
- * (Scala-specific) Create a table based on the dataset in a data source, a
schema and a set of
- * options. Then, returns the corresponding DataFrame.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def createTable(
+ options: Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, schema, options)
+
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
schema: StructType,
description: String,
- options: Map[String, String]): DataFrame
-
- /**
- * Drops the local temporary view with the given view name in the catalog.
If the view has been
- * cached before, then it will also be uncached.
- *
- * Local temporary view is session-scoped. Its lifetime is the lifetime of
the session that
- * created it, i.e. it will be automatically dropped when the session
terminates. It's not tied
- * to any databases, i.e. we can't use `db1.view1` to reference a local
temporary view.
- *
- * Note that, the return type of this method was Unit in Spark 2.0, but
changed to Boolean in
- * Spark 2.1.
- *
- * @param viewName
- * the name of the temporary view to be dropped.
- * @return
- * true if the view is dropped successfully, false otherwise.
- * @since 3.5.0
- */
- def dropTempView(viewName: String): Boolean
-
- /**
- * Drops the global temporary view with the given view name in the catalog.
If the view has been
- * cached before, then it will also be uncached.
- *
- * Global temporary view is cross-session. Its lifetime is the lifetime of
the Spark
- * application, i.e. it will be automatically dropped when the application
terminates. It's tied
- * to a system preserved database `global_temp`, and we must use the
qualified name to refer a
- * global temp view, e.g. `SELECT * FROM global_temp.view1`.
- *
- * @param viewName
- * the unqualified name of the temporary view to be dropped.
- * @return
- * true if the view is dropped successfully, false otherwise.
- * @since 3.5.0
- */
- def dropGlobalTempView(viewName: String): Boolean
-
- /**
- * Recovers all the partitions in the directory of a table and update the
catalog. Only works
- * with a partitioned table, and not a view.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table. If
no database
- * identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
- */
- def recoverPartitions(tableName: String): Unit
-
- /**
- * Returns true if the table is currently cached in-memory.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
If no database
- * identifier is provided, it refers to a temporary view or a table/view
in the current
- * database.
- * @since 3.5.0
- */
- def isCached(tableName: String): Boolean
-
- /**
- * Caches the specified table in-memory.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
If no database
- * identifier is provided, it refers to a temporary view or a table/view
in the current
- * database.
- * @since 3.5.0
- */
- def cacheTable(tableName: String): Unit
-
- /**
- * Caches the specified table with the given storage level.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
If no database
- * identifier is provided, it refers to a temporary view or a table/view
in the current
- * database.
- * @param storageLevel
- * storage level to cache table.
- * @since 3.5.0
- */
- def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
-
- /**
- * Removes the specified table from the in-memory cache.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
If no database
- * identifier is provided, it refers to a temporary view or a table/view
in the current
- * database.
- * @since 3.5.0
- */
- def uncacheTable(tableName: String): Unit
-
- /**
- * Removes all cached tables from the in-memory cache.
- *
- * @since 3.5.0
- */
- def clearCache(): Unit
-
- /**
- * Invalidates and refreshes all the cached data and metadata of the given
table. For
- * performance reasons, Spark SQL or the external data source library it
uses might cache
- * certain metadata about a table, such as the location of blocks. When
those change outside of
- * Spark SQL, users should call this function to invalidate the cache.
- *
- * If this table is cached as an InMemoryRelation, drop the original cached
version and make the
- * new version cached lazily.
- *
- * @param tableName
- * is either a qualified or unqualified name that designates a table/view.
If no database
- * identifier is provided, it refers to a temporary view or a table/view
in the current
- * database.
- * @since 3.5.0
- */
- def refreshTable(tableName: String): Unit
-
- /**
- * Invalidates and refreshes all the cached data (and the associated
metadata) for any `Dataset`
- * that contains the given data source path. Path matching is by prefix,
i.e. "/" would
- * invalidate everything that is cached.
- *
- * @since 3.5.0
- */
- def refreshByPath(path: String): Unit
-
- /**
- * Returns the current catalog in this session.
- *
- * @since 3.5.0
- */
- def currentCatalog(): String
-
- /**
- * Sets the current catalog in this session.
- *
- * @since 3.5.0
- */
- def setCurrentCatalog(catalogName: String): Unit
-
- /**
- * Returns a list of catalogs available in this session.
- *
- * @since 3.5.0
- */
- def listCatalogs(): Dataset[CatalogMetadata]
-
- /**
- * Returns a list of catalogs which name match the specify pattern and
available in this
- * session.
- *
- * @since 3.5.0
- */
- def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, schema, description, options)
}
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 7abf31f4aca3..21c6124116f0 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -148,6 +148,13 @@ object MimaExcludes {
// SPARK-49425: Create a shared DataFrameWriter interface.
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameWriter"),
+ // SPARK-49284: Shared Catalog interface.
+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.CatalogMetadata"),
+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Column"),
+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Database"),
+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Function"),
+
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Table"),
+
// SPARK-49426: Shared DataFrameWriterV2
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CreateTableWriter"),
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameWriterV2"),
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
b/sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala
similarity index 88%
copy from
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
copy to sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala
index cf0fef147ee8..fbb665b7f1b1 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala
@@ -15,41 +15,46 @@
* limitations under the License.
*/
-package org.apache.spark.sql.catalog
+package org.apache.spark.sql.api
import scala.jdk.CollectionConverters._
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
+import _root_.java.util
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalog.{CatalogMetadata, Column, Database,
Function, Table}
import org.apache.spark.sql.types.StructType
import org.apache.spark.storage.StorageLevel
/**
* Catalog interface for Spark. To access this, use `SparkSession.catalog`.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
-abstract class Catalog {
+@Stable
+abstract class Catalog[DS[U] <: Dataset[U, DS]] {
/**
* Returns the current database (namespace) in this session.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
def currentDatabase: String
/**
* Sets the current database (namespace) in this session.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
def setCurrentDatabase(dbName: String): Unit
/**
* Returns a list of databases (namespaces) available within the current
catalog.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
- def listDatabases(): Dataset[Database]
+ def listDatabases(): DS[Database]
/**
* Returns a list of databases (namespaces) which name match the specify
pattern and available
@@ -57,24 +62,24 @@ abstract class Catalog {
*
* @since 3.5.0
*/
- def listDatabases(pattern: String): Dataset[Database]
+ def listDatabases(pattern: String): DS[Database]
/**
* Returns a list of tables/views in the current database (namespace). This
includes all
* temporary views.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
- def listTables(): Dataset[Table]
+ def listTables(): DS[Table]
/**
* Returns a list of tables/views in the specified database (namespace) (the
name can be
* qualified with catalog). This includes all temporary views.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
@throws[AnalysisException]("database does not exist")
- def listTables(dbName: String): Dataset[Table]
+ def listTables(dbName: String): DS[Table]
/**
* Returns a list of tables/views in the specified database (namespace)
which name match the
@@ -83,24 +88,24 @@ abstract class Catalog {
* @since 3.5.0
*/
@throws[AnalysisException]("database does not exist")
- def listTables(dbName: String, pattern: String): Dataset[Table]
+ def listTables(dbName: String, pattern: String): DS[Table]
/**
* Returns a list of functions registered in the current database
(namespace). This includes all
* temporary functions.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
- def listFunctions(): Dataset[Function]
+ def listFunctions(): DS[Function]
/**
* Returns a list of functions registered in the specified database
(namespace) (the name can be
* qualified with catalog). This includes all built-in and temporary
functions.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
@throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String): Dataset[Function]
+ def listFunctions(dbName: String): DS[Function]
/**
* Returns a list of functions registered in the specified database
(namespace) which name match
@@ -110,7 +115,7 @@ abstract class Catalog {
* @since 3.5.0
*/
@throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String, pattern: String): Dataset[Function]
+ def listFunctions(dbName: String, pattern: String): DS[Function]
/**
* Returns a list of columns for the given table/view or temporary view.
@@ -119,10 +124,10 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
It follows the same
* resolution rule with SQL: search for temp views first then table/views
in the current
* database (namespace).
- * @since 3.5.0
+ * @since 2.0.0
*/
@throws[AnalysisException]("table does not exist")
- def listColumns(tableName: String): Dataset[Column]
+ def listColumns(tableName: String): DS[Column]
/**
* Returns a list of columns for the given table/view in the specified
database under the Hive
@@ -135,16 +140,16 @@ abstract class Catalog {
* is an unqualified name that designates a database.
* @param tableName
* is an unqualified name that designates a table/view.
- * @since 3.5.0
+ * @since 2.0.0
*/
@throws[AnalysisException]("database or table does not exist")
- def listColumns(dbName: String, tableName: String): Dataset[Column]
+ def listColumns(dbName: String, tableName: String): DS[Column]
/**
* Get the database (namespace) with the specified name (can be qualified
with catalog). This
* throws an AnalysisException when the database (namespace) cannot be found.
*
- * @since 3.5.0
+ * @since 2.1.0
*/
@throws[AnalysisException]("database does not exist")
def getDatabase(dbName: String): Database
@@ -157,7 +162,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
It follows the same
* resolution rule with SQL: search for temp views first then table/views
in the current
* database (namespace).
- * @since 3.5.0
+ * @since 2.1.0
*/
@throws[AnalysisException]("table does not exist")
def getTable(tableName: String): Table
@@ -169,7 +174,7 @@ abstract class Catalog {
* To get table/view in other catalogs, please use `getTable(tableName)`
with qualified
* table/view name instead.
*
- * @since 3.5.0
+ * @since 2.1.0
*/
@throws[AnalysisException]("database or table does not exist")
def getTable(dbName: String, tableName: String): Table
@@ -182,7 +187,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a function.
It follows the same
* resolution rule with SQL: search for built-in/temp functions first then
functions in the
* current database (namespace).
- * @since 3.5.0
+ * @since 2.1.0
*/
@throws[AnalysisException]("function does not exist")
def getFunction(functionName: String): Function
@@ -198,7 +203,7 @@ abstract class Catalog {
* is an unqualified name that designates a database.
* @param functionName
* is an unqualified name that designates a function in the specified
database
- * @since 3.5.0
+ * @since 2.1.0
*/
@throws[AnalysisException]("database or function does not exist")
def getFunction(dbName: String, functionName: String): Function
@@ -207,7 +212,7 @@ abstract class Catalog {
* Check if the database (namespace) with the specified name exists (the
name can be qualified
* with catalog).
*
- * @since 3.5.0
+ * @since 2.1.0
*/
def databaseExists(dbName: String): Boolean
@@ -219,7 +224,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
It follows the same
* resolution rule with SQL: search for temp views first then table/views
in the current
* database (namespace).
- * @since 3.5.0
+ * @since 2.1.0
*/
def tableExists(tableName: String): Boolean
@@ -234,7 +239,7 @@ abstract class Catalog {
* is an unqualified name that designates a database.
* @param tableName
* is an unqualified name that designates a table.
- * @since 3.5.0
+ * @since 2.1.0
*/
def tableExists(dbName: String, tableName: String): Boolean
@@ -246,7 +251,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a function.
It follows the same
* resolution rule with SQL: search for built-in/temp functions first then
functions in the
* current database (namespace).
- * @since 3.5.0
+ * @since 2.1.0
*/
def functionExists(functionName: String): Boolean
@@ -261,7 +266,7 @@ abstract class Catalog {
* is an unqualified name that designates a database.
* @param functionName
* is an unqualified name that designates a function.
- * @since 3.5.0
+ * @since 2.1.0
*/
def functionExists(dbName: String, functionName: String): Boolean
@@ -272,10 +277,10 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String): DataFrame = {
+ def createExternalTable(tableName: String, path: String): DS[Row] = {
createTable(tableName, path)
}
@@ -286,9 +291,9 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
- def createTable(tableName: String, path: String): DataFrame
+ def createTable(tableName: String, path: String): DS[Row]
/**
* Creates a table from the given path based on a data source and returns
the corresponding
@@ -297,10 +302,10 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String, source: String):
DataFrame = {
+ def createExternalTable(tableName: String, path: String, source: String):
DS[Row] = {
createTable(tableName, path, source)
}
@@ -311,9 +316,9 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
- def createTable(tableName: String, path: String, source: String): DataFrame
+ def createTable(tableName: String, path: String, source: String): DS[Row]
/**
* Creates a table from the given path based on a data source and a set of
options. Then,
@@ -322,13 +327,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
def createExternalTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(tableName, source, options)
}
@@ -339,12 +344,12 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
def createTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(tableName, source, options.asScala.toMap)
}
@@ -355,13 +360,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
def createExternalTable(
tableName: String,
source: String,
- options: Map[String, String]): DataFrame = {
+ options: Map[String, String]): DS[Row] = {
createTable(tableName, source, options)
}
@@ -372,9 +377,9 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
- def createTable(tableName: String, source: String, options: Map[String,
String]): DataFrame
+ def createTable(tableName: String, source: String, options: Map[String,
String]): DS[Row]
/**
* Create a table from the given path based on a data source, a schema and a
set of options.
@@ -383,14 +388,14 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(tableName, source, schema, options)
}
@@ -401,13 +406,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 3.1.0
*/
def createTable(
tableName: String,
source: String,
description: String,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(
tableName,
source = source,
@@ -422,13 +427,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 3.1.0
*/
def createTable(
tableName: String,
source: String,
description: String,
- options: Map[String, String]): DataFrame
+ options: Map[String, String]): DS[Row]
/**
* Create a table based on the dataset in a data source, a schema and a set
of options. Then,
@@ -437,13 +442,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
def createTable(
tableName: String,
source: String,
schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(tableName, source, schema, options.asScala.toMap)
}
@@ -454,14 +459,14 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.0.0
*/
@deprecated("use createTable instead.", "2.2.0")
def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- options: Map[String, String]): DataFrame = {
+ options: Map[String, String]): DS[Row] = {
createTable(tableName, source, schema, options)
}
@@ -472,13 +477,13 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.2.0
*/
def createTable(
tableName: String,
source: String,
schema: StructType,
- options: Map[String, String]): DataFrame
+ options: Map[String, String]): DS[Row]
/**
* Create a table based on the dataset in a data source, a schema and a set
of options. Then,
@@ -487,14 +492,14 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 3.1.0
*/
def createTable(
tableName: String,
source: String,
schema: StructType,
description: String,
- options: java.util.Map[String, String]): DataFrame = {
+ options: util.Map[String, String]): DS[Row] = {
createTable(
tableName,
source = source,
@@ -510,14 +515,14 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 3.1.0
*/
def createTable(
tableName: String,
source: String,
schema: StructType,
description: String,
- options: Map[String, String]): DataFrame
+ options: Map[String, String]): DS[Row]
/**
* Drops the local temporary view with the given view name in the catalog.
If the view has been
@@ -534,7 +539,7 @@ abstract class Catalog {
* the name of the temporary view to be dropped.
* @return
* true if the view is dropped successfully, false otherwise.
- * @since 3.5.0
+ * @since 2.0.0
*/
def dropTempView(viewName: String): Boolean
@@ -551,7 +556,7 @@ abstract class Catalog {
* the unqualified name of the temporary view to be dropped.
* @return
* true if the view is dropped successfully, false otherwise.
- * @since 3.5.0
+ * @since 2.1.0
*/
def dropGlobalTempView(viewName: String): Boolean
@@ -562,7 +567,7 @@ abstract class Catalog {
* @param tableName
* is either a qualified or unqualified name that designates a table. If
no database
* identifier is provided, it refers to a table in the current database.
- * @since 3.5.0
+ * @since 2.1.1
*/
def recoverPartitions(tableName: String): Unit
@@ -573,7 +578,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
If no database
* identifier is provided, it refers to a temporary view or a table/view
in the current
* database.
- * @since 3.5.0
+ * @since 2.0.0
*/
def isCached(tableName: String): Boolean
@@ -584,7 +589,7 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
If no database
* identifier is provided, it refers to a temporary view or a table/view
in the current
* database.
- * @since 3.5.0
+ * @since 2.0.0
*/
def cacheTable(tableName: String): Unit
@@ -597,7 +602,7 @@ abstract class Catalog {
* database.
* @param storageLevel
* storage level to cache table.
- * @since 3.5.0
+ * @since 2.3.0
*/
def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
@@ -608,14 +613,14 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
If no database
* identifier is provided, it refers to a temporary view or a table/view
in the current
* database.
- * @since 3.5.0
+ * @since 2.0.0
*/
def uncacheTable(tableName: String): Unit
/**
* Removes all cached tables from the in-memory cache.
*
- * @since 3.5.0
+ * @since 2.0.0
*/
def clearCache(): Unit
@@ -632,39 +637,40 @@ abstract class Catalog {
* is either a qualified or unqualified name that designates a table/view.
If no database
* identifier is provided, it refers to a temporary view or a table/view
in the current
* database.
- * @since 3.5.0
+ * @since 2.0.0
*/
def refreshTable(tableName: String): Unit
/**
* Invalidates and refreshes all the cached data (and the associated
metadata) for any `Dataset`
- * that contains the given data source path. Path matching is by prefix,
i.e. "/" would
- * invalidate everything that is cached.
+ * that contains the given data source path. Path matching is by checking
for sub-directories,
+ * i.e. "/" would invalidate everything that is cached and "/test/parent"
would invalidate
+ * everything that is a subdirectory of "/test/parent".
*
- * @since 3.5.0
+ * @since 2.0.0
*/
def refreshByPath(path: String): Unit
/**
* Returns the current catalog in this session.
*
- * @since 3.5.0
+ * @since 3.4.0
*/
def currentCatalog(): String
/**
* Sets the current catalog in this session.
*
- * @since 3.5.0
+ * @since 3.4.0
*/
def setCurrentCatalog(catalogName: String): Unit
/**
* Returns a list of catalogs available in this session.
*
- * @since 3.5.0
+ * @since 3.4.0
*/
- def listCatalogs(): Dataset[CatalogMetadata]
+ def listCatalogs(): DS[CatalogMetadata]
/**
* Returns a list of catalogs which name match the specify pattern and
available in this
@@ -672,5 +678,5 @@ abstract class Catalog {
*
* @since 3.5.0
*/
- def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+ def listCatalogs(pattern: String): DS[CatalogMetadata]
}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
index ef7c33d95f61..cf502c746d24 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
@@ -227,6 +227,14 @@ abstract class SparkSession[DS[U] <: Dataset[U, DS]]
extends Serializable with C
| Catalog-related methods |
* ------------------------- */
+ /**
+ * Interface through which the user may create, drop, alter or query
underlying databases,
+ * tables, functions etc.
+ *
+ * @since 2.0.0
+ */
+ def catalog: Catalog[DS]
+
/**
* Returns the specified table/view as a `DataFrame`. If it's a table, it
must support batch
* reading and the returned DataFrame is the batch scan query plan of this
table. If it's a
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
similarity index 95%
rename from
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
index 33e9007ac7e2..3a3ba9d26132 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalog
import javax.annotation.Nullable
+import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.DefinedByConstructorParams
// Note: all classes here are expected to be wrapped in Datasets and so must
extend
@@ -31,16 +32,13 @@ import
org.apache.spark.sql.catalyst.DefinedByConstructorParams
* name of the catalog
* @param description
* description of the catalog
- * @since 3.5.0
+ * @since 3.4.0
*/
class CatalogMetadata(val name: String, @Nullable val description: String)
extends DefinedByConstructorParams {
- override def toString: String = {
- "Catalog[" +
- s"name='$name', " +
- Option(description).map { d => s"description='$d'] " }.getOrElse("]")
- }
+ override def toString: String =
+ s"Catalog[name='$name', ${Option(description).map(d =>
s"description='$d'").getOrElse("")}]"
}
/**
@@ -54,8 +52,9 @@ class CatalogMetadata(val name: String, @Nullable val
description: String)
* description of the database.
* @param locationUri
* path (in the form of a uri) to data files.
- * @since 3.5.0
+ * @since 2.0.0
*/
+@Stable
class Database(
val name: String,
@Nullable val catalog: String,
@@ -92,8 +91,9 @@ class Database(
* type of the table (e.g. view, table).
* @param isTemporary
* whether the table is a temporary table.
- * @since 3.5.0
+ * @since 2.0.0
*/
+@Stable
class Table(
val name: String,
@Nullable val catalog: String,
@@ -155,8 +155,9 @@ class Table(
* whether the column is a bucket column.
* @param isCluster
* whether the column is a clustering column.
- * @since 3.5.0
+ * @since 2.0.0
*/
+@Stable
class Column(
val name: String,
@Nullable val description: String,
@@ -205,8 +206,9 @@ class Column(
* the fully qualified class name of the function.
* @param isTemporary
* whether the function is a temporary function or not.
- * @since 3.5.0
+ * @since 2.0.0
*/
+@Stable
class Function(
val name: String,
@Nullable val catalog: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 906e5ede8b4e..a7fb71d95d14 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -480,12 +480,7 @@ class SparkSession private(
| Catalog-related methods |
* ------------------------- */
- /**
- * Interface through which the user may create, drop, alter or query
underlying
- * databases, tables, functions etc.
- *
- * @since 2.0.0
- */
+ /** @inheritdoc */
@transient lazy val catalog: Catalog = new CatalogImpl(self)
/** @inheritdoc */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 676be7fe41cb..661e43fe73ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -14,657 +14,152 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.spark.sql.catalog
-import scala.jdk.CollectionConverters._
+import java.util
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
+import org.apache.spark.sql.{api, DataFrame, Dataset}
import org.apache.spark.sql.types.StructType
-import org.apache.spark.storage.StorageLevel
-
-/**
- * Catalog interface for Spark. To access this, use `SparkSession.catalog`.
- *
- * @since 2.0.0
- */
-@Stable
-abstract class Catalog {
-
- /**
- * Returns the current database (namespace) in this session.
- *
- * @since 2.0.0
- */
- def currentDatabase: String
-
- /**
- * Sets the current database (namespace) in this session.
- *
- * @since 2.0.0
- */
- def setCurrentDatabase(dbName: String): Unit
-
- /**
- * Returns a list of databases (namespaces) available within the current
catalog.
- *
- * @since 2.0.0
- */
- def listDatabases(): Dataset[Database]
-
- /**
- * Returns a list of databases (namespaces) which name match the specify
pattern and
- * available within the current catalog.
- *
- * @since 3.5.0
- */
- def listDatabases(pattern: String): Dataset[Database]
-
- /**
- * Returns a list of tables/views in the current database (namespace).
- * This includes all temporary views.
- *
- * @since 2.0.0
- */
- def listTables(): Dataset[Table]
-
- /**
- * Returns a list of tables/views in the specified database (namespace) (the
name can be qualified
- * with catalog).
- * This includes all temporary views.
- *
- * @since 2.0.0
- */
- @throws[AnalysisException]("database does not exist")
- def listTables(dbName: String): Dataset[Table]
-
- /**
- * Returns a list of tables/views in the specified database (namespace)
- * which name match the specify pattern (the name can be qualified with
catalog).
- * This includes all temporary views.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listTables(dbName: String, pattern: String): Dataset[Table]
-
- /**
- * Returns a list of functions registered in the current database
(namespace).
- * This includes all temporary functions.
- *
- * @since 2.0.0
- */
- def listFunctions(): Dataset[Function]
-
- /**
- * Returns a list of functions registered in the specified database
(namespace) (the name can be
- * qualified with catalog).
- * This includes all built-in and temporary functions.
- *
- * @since 2.0.0
- */
- @throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String): Dataset[Function]
-
- /**
- * Returns a list of functions registered in the specified database
(namespace)
- * which name match the specify pattern (the name can be qualified with
catalog).
- * This includes all built-in and temporary functions.
- *
- * @since 3.5.0
- */
- @throws[AnalysisException]("database does not exist")
- def listFunctions(dbName: String, pattern: String): Dataset[Function]
- /**
- * Returns a list of columns for the given table/view or temporary view.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view. It
- * follows the same resolution rule with SQL: search for
temp views first then
- * table/views in the current database (namespace).
- * @since 2.0.0
- */
- @throws[AnalysisException]("table does not exist")
- def listColumns(tableName: String): Dataset[Column]
+/** @inheritdoc */
+abstract class Catalog extends api.Catalog[Dataset] {
+ /** @inheritdoc */
+ override def listDatabases(): Dataset[Database]
- /**
- * Returns a list of columns for the given table/view in the specified
database under the Hive
- * Metastore.
- *
- * To list columns for table/view in other catalogs, please use
`listColumns(tableName)` with
- * qualified table/view name instead.
- *
- * @param dbName is an unqualified name that designates a database.
- * @param tableName is an unqualified name that designates a table/view.
- * @since 2.0.0
- */
- @throws[AnalysisException]("database or table does not exist")
- def listColumns(dbName: String, tableName: String): Dataset[Column]
+ /** @inheritdoc */
+ override def listDatabases(pattern: String): Dataset[Database]
- /**
- * Get the database (namespace) with the specified name (can be qualified
with catalog). This
- * throws an AnalysisException when the database (namespace) cannot be found.
- *
- * @since 2.1.0
- */
- @throws[AnalysisException]("database does not exist")
- def getDatabase(dbName: String): Database
+ /** @inheritdoc */
+ override def listTables(): Dataset[Table]
- /**
- * Get the table or view with the specified name. This table can be a
temporary view or a
- * table/view. This throws an AnalysisException when no Table can be found.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view. It
- * follows the same resolution rule with SQL: search for
temp views first then
- * table/views in the current database (namespace).
- * @since 2.1.0
- */
- @throws[AnalysisException]("table does not exist")
- def getTable(tableName: String): Table
+ /** @inheritdoc */
+ override def listTables(dbName: String): Dataset[Table]
- /**
- * Get the table or view with the specified name in the specified database
under the Hive
- * Metastore. This throws an AnalysisException when no Table can be found.
- *
- * To get table/view in other catalogs, please use `getTable(tableName)`
with qualified table/view
- * name instead.
- *
- * @since 2.1.0
- */
- @throws[AnalysisException]("database or table does not exist")
- def getTable(dbName: String, tableName: String): Table
+ /** @inheritdoc */
+ override def listTables(dbName: String, pattern: String): Dataset[Table]
- /**
- * Get the function with the specified name. This function can be a
temporary function or a
- * function. This throws an AnalysisException when the function cannot be
found.
- *
- * @param functionName is either a qualified or unqualified name that
designates a function. It
- * follows the same resolution rule with SQL: search for
built-in/temp
- * functions first then functions in the current
database (namespace).
- * @since 2.1.0
- */
- @throws[AnalysisException]("function does not exist")
- def getFunction(functionName: String): Function
+ /** @inheritdoc */
+ override def listFunctions(): Dataset[Function]
- /**
- * Get the function with the specified name in the specified database under
the Hive Metastore.
- * This throws an AnalysisException when the function cannot be found.
- *
- * To get functions in other catalogs, please use
`getFunction(functionName)` with qualified
- * function name instead.
- *
- * @param dbName is an unqualified name that designates a database.
- * @param functionName is an unqualified name that designates a function in
the specified database
- * @since 2.1.0
- */
- @throws[AnalysisException]("database or function does not exist")
- def getFunction(dbName: String, functionName: String): Function
+ /** @inheritdoc */
+ override def listFunctions(dbName: String): Dataset[Function]
- /**
- * Check if the database (namespace) with the specified name exists (the
name can be qualified
- * with catalog).
- *
- * @since 2.1.0
- */
- def databaseExists(dbName: String): Boolean
+ /** @inheritdoc */
+ override def listFunctions(dbName: String, pattern: String):
Dataset[Function]
- /**
- * Check if the table or view with the specified name exists. This can
either be a temporary
- * view or a table/view.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view. It
- * follows the same resolution rule with SQL: search for
temp views first then
- * table/views in the current database (namespace).
- * @since 2.1.0
- */
- def tableExists(tableName: String): Boolean
+ /** @inheritdoc */
+ override def listColumns(tableName: String): Dataset[Column]
- /**
- * Check if the table or view with the specified name exists in the
specified database under the
- * Hive Metastore.
- *
- * To check existence of table/view in other catalogs, please use
`tableExists(tableName)` with
- * qualified table/view name instead.
- *
- * @param dbName is an unqualified name that designates a database.
- * @param tableName is an unqualified name that designates a table.
- * @since 2.1.0
- */
- def tableExists(dbName: String, tableName: String): Boolean
+ /** @inheritdoc */
+ override def listColumns(dbName: String, tableName: String): Dataset[Column]
- /**
- * Check if the function with the specified name exists. This can either be
a temporary function
- * or a function.
- *
- * @param functionName is either a qualified or unqualified name that
designates a function. It
- * follows the same resolution rule with SQL: search for
built-in/temp
- * functions first then functions in the current
database (namespace).
- * @since 2.1.0
- */
- def functionExists(functionName: String): Boolean
+ /** @inheritdoc */
+ override def createTable(tableName: String, path: String): DataFrame
- /**
- * Check if the function with the specified name exists in the specified
database under the
- * Hive Metastore.
- *
- * To check existence of functions in other catalogs, please use
`functionExists(functionName)`
- * with qualified function name instead.
- *
- * @param dbName is an unqualified name that designates a database.
- * @param functionName is an unqualified name that designates a function.
- * @since 2.1.0
- */
- def functionExists(dbName: String, functionName: String): Boolean
+ /** @inheritdoc */
+ override def createTable(tableName: String, path: String, source: String):
DataFrame
- /**
- * Creates a table from the given path and returns the corresponding
DataFrame.
- * It will use the default data source configured by
spark.sql.sources.default.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String): DataFrame = {
- createTable(tableName, path)
- }
-
- /**
- * Creates a table from the given path and returns the corresponding
DataFrame.
- * It will use the default data source configured by
spark.sql.sources.default.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(tableName: String, path: String): DataFrame
-
- /**
- * Creates a table from the given path based on a data source and returns
the corresponding
- * DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(tableName: String, path: String, source: String):
DataFrame = {
- createTable(tableName, path, source)
- }
-
- /**
- * Creates a table from the given path based on a data source and returns
the corresponding
- * DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(tableName: String, path: String, source: String): DataFrame
-
- /**
- * Creates a table from the given path based on a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, options)
- }
+ options: Map[String, String]): DataFrame
- /**
- * Creates a table based on the dataset in a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, options.asScala.toMap)
- }
+ description: String,
+ options: Map[String, String]): DataFrame
- /**
- * (Scala-specific)
- * Creates a table from the given path based on a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- options: Map[String, String]): DataFrame = {
- createTable(tableName, source, options)
- }
+ schema: StructType,
+ options: Map[String, String]): DataFrame
- /**
- * (Scala-specific)
- * Creates a table based on the dataset in a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
+ schema: StructType,
+ description: String,
options: Map[String, String]): DataFrame
- /**
- * Create a table from the given path based on a data source, a schema and a
set of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ /** @inheritdoc */
+ override def listCatalogs(): Dataset[CatalogMetadata]
+
+ /** @inheritdoc */
+ override def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+
+ /** @inheritdoc */
+ override def createExternalTable(tableName: String, path: String): DataFrame
=
+ super.createExternalTable(tableName, path)
+
+ /** @inheritdoc */
+ override def createExternalTable(tableName: String, path: String, source:
String): DataFrame =
+ super.createExternalTable(tableName, path, source)
+
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
- schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options)
- }
+ options: util.Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, options)
- /**
- * Creates a table based on the dataset in a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 3.1.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- description: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(
- tableName,
- source = source,
- description = description,
- options = options.asScala.toMap
- )
- }
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, options)
- /**
- * (Scala-specific)
- * Creates a table based on the dataset in a data source and a set of
options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 3.1.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
- description: String,
- options: Map[String, String]): DataFrame
+ options: Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, options)
- /**
- * Create a table based on the dataset in a data source, a schema and a set
of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options.asScala.toMap)
- }
+ options: util.Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, schema, options)
- /**
- * (Scala-specific)
- * Create a table from the given path based on a data source, a schema and a
set of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.0.0
- */
- @deprecated("use createTable instead.", "2.2.0")
- def createExternalTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
- schema: StructType,
- options: Map[String, String]): DataFrame = {
- createTable(tableName, source, schema, options)
- }
+ description: String,
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, description, options)
- /**
- * (Scala-specific)
- * Create a table based on the dataset in a data source, a schema and a set
of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 2.2.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
schema: StructType,
- options: Map[String, String]): DataFrame
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, schema, options)
- /**
- * Create a table based on the dataset in a data source, a schema and a set
of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 3.1.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createExternalTable(
tableName: String,
source: String,
schema: StructType,
- description: String,
- options: java.util.Map[String, String]): DataFrame = {
- createTable(
- tableName,
- source = source,
- schema = schema,
- description = description,
- options = options.asScala.toMap
- )
- }
+ options: Map[String, String]): DataFrame =
+ super.createExternalTable(tableName, source, schema, options)
- /**
- * (Scala-specific)
- * Create a table based on the dataset in a data source, a schema and a set
of options.
- * Then, returns the corresponding DataFrame.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in
- * the current database.
- * @since 3.1.0
- */
- def createTable(
+ /** @inheritdoc */
+ override def createTable(
tableName: String,
source: String,
schema: StructType,
description: String,
- options: Map[String, String]): DataFrame
-
- /**
- * Drops the local temporary view with the given view name in the catalog.
- * If the view has been cached before, then it will also be uncached.
- *
- * Local temporary view is session-scoped. Its lifetime is the lifetime of
the session that
- * created it, i.e. it will be automatically dropped when the session
terminates. It's not
- * tied to any databases, i.e. we can't use `db1.view1` to reference a local
temporary view.
- *
- * Note that, the return type of this method was Unit in Spark 2.0, but
changed to Boolean
- * in Spark 2.1.
- *
- * @param viewName the name of the temporary view to be dropped.
- * @return true if the view is dropped successfully, false otherwise.
- * @since 2.0.0
- */
- def dropTempView(viewName: String): Boolean
-
- /**
- * Drops the global temporary view with the given view name in the catalog.
- * If the view has been cached before, then it will also be uncached.
- *
- * Global temporary view is cross-session. Its lifetime is the lifetime of
the Spark application,
- * i.e. it will be automatically dropped when the application terminates.
It's tied to a system
- * preserved database `global_temp`, and we must use the qualified name to
refer a global temp
- * view, e.g. `SELECT * FROM global_temp.view1`.
- *
- * @param viewName the unqualified name of the temporary view to be dropped.
- * @return true if the view is dropped successfully, false otherwise.
- * @since 2.1.0
- */
- def dropGlobalTempView(viewName: String): Boolean
-
- /**
- * Recovers all the partitions in the directory of a table and update the
catalog.
- * Only works with a partitioned table, and not a view.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table.
- * If no database identifier is provided, it refers to a
table in the
- * current database.
- * @since 2.1.1
- */
- def recoverPartitions(tableName: String): Unit
-
- /**
- * Returns true if the table is currently cached in-memory.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view.
- * If no database identifier is provided, it refers to a
temporary view or
- * a table/view in the current database.
- * @since 2.0.0
- */
- def isCached(tableName: String): Boolean
-
- /**
- * Caches the specified table in-memory.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view.
- * If no database identifier is provided, it refers to a
temporary view or
- * a table/view in the current database.
- * @since 2.0.0
- */
- def cacheTable(tableName: String): Unit
-
- /**
- * Caches the specified table with the given storage level.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view.
- * If no database identifier is provided, it refers to a
temporary view or
- * a table/view in the current database.
- * @param storageLevel storage level to cache table.
- * @since 2.3.0
- */
- def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
-
-
- /**
- * Removes the specified table from the in-memory cache.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view.
- * If no database identifier is provided, it refers to a
temporary view or
- * a table/view in the current database.
- * @since 2.0.0
- */
- def uncacheTable(tableName: String): Unit
-
- /**
- * Removes all cached tables from the in-memory cache.
- *
- * @since 2.0.0
- */
- def clearCache(): Unit
-
- /**
- * Invalidates and refreshes all the cached data and metadata of the given
table. For performance
- * reasons, Spark SQL or the external data source library it uses might
cache certain metadata
- * about a table, such as the location of blocks. When those change outside
of Spark SQL, users
- * should call this function to invalidate the cache.
- *
- * If this table is cached as an InMemoryRelation, drop the original cached
version and make the
- * new version cached lazily.
- *
- * @param tableName is either a qualified or unqualified name that
designates a table/view.
- * If no database identifier is provided, it refers to a
temporary view or
- * a table/view in the current database.
- * @since 2.0.0
- */
- def refreshTable(tableName: String): Unit
-
- /**
- * Invalidates and refreshes all the cached data (and the associated
metadata) for any `Dataset`
- * that contains the given data source path. Path matching is by checking
for sub-directories,
- * i.e. "/" would invalidate everything that is cached and "/test/parent"
would invalidate
- * everything that is a subdirectory of "/test/parent".
- *
- * @since 2.0.0
- */
- def refreshByPath(path: String): Unit
-
- /**
- * Returns the current catalog in this session.
- *
- * @since 3.4.0
- */
- def currentCatalog(): String
-
- /**
- * Sets the current catalog in this session.
- *
- * @since 3.4.0
- */
- def setCurrentCatalog(catalogName: String): Unit
-
- /**
- * Returns a list of catalogs available in this session.
- *
- * @since 3.4.0
- */
- def listCatalogs(): Dataset[CatalogMetadata]
-
- /**
- * Returns a list of catalogs which name match the specify pattern and
available in this session.
- *
- * @since 3.5.0
- */
- def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+ options: util.Map[String, String]): DataFrame =
+ super.createTable(tableName, source, schema, description, options)
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
deleted file mode 100644
index 31aee5a43ef4..000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalog
-
-import javax.annotation.Nullable
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalyst.DefinedByConstructorParams
-
-
-// Note: all classes here are expected to be wrapped in Datasets and so must
extend
-// DefinedByConstructorParams for the catalog to be able to create encoders
for them.
-
-/**
- * A catalog in Spark, as returned by the `listCatalogs` method defined in
[[Catalog]].
- *
- * @param name name of the catalog
- * @param description description of the catalog
- * @since 3.4.0
- */
-class CatalogMetadata(
- val name: String,
- @Nullable val description: String)
- extends DefinedByConstructorParams {
-
- override def toString: String =
- s"Catalog[name='$name', ${Option(description).map(d =>
s"description='$d'").getOrElse("")}]"
-}
-
-/**
- * A database in Spark, as returned by the `listDatabases` method defined in
[[Catalog]].
- *
- * @param name name of the database.
- * @param catalog name of the catalog that the table belongs to.
- * @param description description of the database.
- * @param locationUri path (in the form of a uri) to data files.
- * @since 2.0.0
- */
-@Stable
-class Database(
- val name: String,
- @Nullable val catalog: String,
- @Nullable val description: String,
- val locationUri: String)
- extends DefinedByConstructorParams {
-
- def this(name: String, description: String, locationUri: String) = {
- this(name, null, description, locationUri)
- }
-
- override def toString: String = {
- "Database[" +
- s"name='$name', " +
- Option(catalog).map { c => s"catalog='$c', " }.getOrElse("") +
- Option(description).map { d => s"description='$d', " }.getOrElse("") +
- s"path='$locationUri']"
- }
-
-}
-
-
-/**
- * A table in Spark, as returned by the `listTables` method in [[Catalog]].
- *
- * @param name name of the table.
- * @param catalog name of the catalog that the table belongs to.
- * @param namespace the namespace that the table belongs to.
- * @param description description of the table.
- * @param tableType type of the table (e.g. view, table).
- * @param isTemporary whether the table is a temporary table.
- * @since 2.0.0
- */
-@Stable
-class Table(
- val name: String,
- @Nullable val catalog: String,
- @Nullable val namespace: Array[String],
- @Nullable val description: String,
- val tableType: String,
- val isTemporary: Boolean)
- extends DefinedByConstructorParams {
-
- if (namespace != null) {
- assert(namespace.forall(_ != null))
- }
-
- def this(
- name: String,
- database: String,
- description: String,
- tableType: String,
- isTemporary: Boolean) = {
- this(name, null, if (database != null) Array(database) else null,
- description, tableType, isTemporary)
- }
-
- def database: String = {
- if (namespace != null && namespace.length == 1) namespace(0) else null
- }
-
- override def toString: String = {
- "Table[" +
- s"name='$name', " +
- Option(catalog).map { d => s"catalog='$d', " }.getOrElse("") +
- Option(database).map { d => s"database='$d', " }.getOrElse("") +
- Option(description).map { d => s"description='$d', " }.getOrElse("") +
- s"tableType='$tableType', " +
- s"isTemporary='$isTemporary']"
- }
-
-}
-
-
-/**
- * A column in Spark, as returned by `listColumns` method in [[Catalog]].
- *
- * @param name name of the column.
- * @param description description of the column.
- * @param dataType data type of the column.
- * @param nullable whether the column is nullable.
- * @param isPartition whether the column is a partition column.
- * @param isBucket whether the column is a bucket column.
- * @param isCluster whether the column is a clustering column.
- * @since 2.0.0
- */
-@Stable
-class Column(
- val name: String,
- @Nullable val description: String,
- val dataType: String,
- val nullable: Boolean,
- val isPartition: Boolean,
- val isBucket: Boolean,
- val isCluster: Boolean)
- extends DefinedByConstructorParams {
-
- def this(
- name: String,
- description: String,
- dataType: String,
- nullable: Boolean,
- isPartition: Boolean,
- isBucket: Boolean) = {
- this(name, description, dataType, nullable, isPartition, isBucket,
isCluster = false)
- }
-
- override def toString: String = {
- "Column[" +
- s"name='$name', " +
- Option(description).map { d => s"description='$d', " }.getOrElse("") +
- s"dataType='$dataType', " +
- s"nullable='$nullable', " +
- s"isPartition='$isPartition', " +
- s"isBucket='$isBucket', " +
- s"isCluster='$isCluster']"
- }
-
-}
-
-
-/**
- * A user-defined function in Spark, as returned by `listFunctions` method in
[[Catalog]].
- *
- * @param name name of the function.
- * @param catalog name of the catalog that the table belongs to.
- * @param namespace the namespace that the table belongs to.
- * @param description description of the function; description can be null.
- * @param className the fully qualified class name of the function.
- * @param isTemporary whether the function is a temporary function or not.
- * @since 2.0.0
- */
-@Stable
-class Function(
- val name: String,
- @Nullable val catalog: String,
- @Nullable val namespace: Array[String],
- @Nullable val description: String,
- val className: String,
- val isTemporary: Boolean)
- extends DefinedByConstructorParams {
-
- if (namespace != null) {
- assert(namespace.forall(_ != null))
- }
-
- def this(
- name: String,
- database: String,
- description: String,
- className: String,
- isTemporary: Boolean) = {
- this(name, null, if (database != null) Array(database) else null,
- description, className, isTemporary)
- }
-
- def database: String = {
- if (namespace != null && namespace.length == 1) namespace(0) else null
- }
-
- override def toString: String = {
- "Function[" +
- s"name='$name', " +
- Option(database).map { d => s"database='$d', " }.getOrElse("") +
- Option(description).map { d => s"description='$d', " }.getOrElse("") +
- s"className='$className', " +
- s"isTemporary='$isTemporary']"
- }
-
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]