Repository: spark
Updated Branches:
  refs/heads/master d5c79f564 -> ca1b21985


[SPARK-15052][SQL] Use builder pattern to create SparkSession

## What changes were proposed in this pull request?
This patch creates a builder pattern for creating SparkSession. The new code is 
unused and mostly deadcode. I'm putting it up here for feedback.

There are a few TODOs that can be done as follow-up pull requests:
- [ ] Update tests to use this
- [ ] Update examples to use this
- [ ] Clean up SQLContext code w.r.t. this one (i.e. SparkSession shouldn't 
call into SQLContext.getOrCreate; it should be the other way around)
- [ ] Remove SparkSession.withHiveSupport
- [ ] Disable the old constructor (by making it private) so the only way to 
start a SparkSession is through this builder pattern

## How was this patch tested?
Part of the future pull request is to clean this up and switch existing tests 
to use this.

Author: Reynold Xin <[email protected]>

Closes #12830 from rxin/sparksession-builder.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca1b2198
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca1b2198
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca1b2198

Branch: refs/heads/master
Commit: ca1b2198581b8de1651a88fc97540570a2347dc9
Parents: d5c79f5
Author: Reynold Xin <[email protected]>
Authored: Mon May 2 15:27:16 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Mon May 2 15:27:16 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/SparkSession.scala     | 131 ++++++++++++++++++-
 1 file changed, 127 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ca1b2198/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 4c2a7b8..3836ce2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.sql
 
 import java.beans.Introspector
-import java.util.Properties
 
-import scala.collection.immutable
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
@@ -30,7 +28,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{CATALOG_IMPLEMENTATION, ConfigEntry}
+import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst._
@@ -49,7 +47,16 @@ import org.apache.spark.util.Utils
 
 
 /**
- * The entry point to Spark execution.
+ * The entry point to programming Spark with the Dataset and DataFrame API.
+ *
+ * To create a SparkSession, use the following builder pattern:
+ *
+ * {{{
+ *   SparkSession.builder()
+ *     .master("local")
+ *     .config("spark.some.config.option", "some-value").
+ *     .getOrCreate()
+ * }}}
  */
 class SparkSession private(
     @transient val sparkContext: SparkContext,
@@ -635,6 +642,122 @@ class SparkSession private(
 
 object SparkSession {
 
+  /**
+   * Builder for [[SparkSession]].
+   */
+  class Builder {
+
+    private[this] val options = new scala.collection.mutable.HashMap[String, 
String]
+
+    /**
+     * Sets a name for the application, which will be shown in the Spark web 
UI.
+     *
+     * @since 2.0.0
+     */
+    def appName(name: String): Builder = config("spark.app.name", name)
+
+    /**
+     * Sets a config option. Options set using this method are automatically 
propagated to
+     * both [[SparkConf]] and SparkSession's own configuration.
+     *
+     * @since 2.0.0
+     */
+    def config(key: String, value: String): Builder = synchronized {
+      options += key -> value
+      this
+    }
+
+    /**
+     * Sets a config option. Options set using this method are automatically 
propagated to
+     * both [[SparkConf]] and SparkSession's own configuration.
+     *
+     * @since 2.0.0
+     */
+    def config(key: String, value: Long): Builder = synchronized {
+      options += key -> value.toString
+      this
+    }
+
+    /**
+     * Sets a config option. Options set using this method are automatically 
propagated to
+     * both [[SparkConf]] and SparkSession's own configuration.
+     *
+     * @since 2.0.0
+     */
+    def config(key: String, value: Double): Builder = synchronized {
+      options += key -> value.toString
+      this
+    }
+
+    /**
+     * Sets a config option. Options set using this method are automatically 
propagated to
+     * both [[SparkConf]] and SparkSession's own configuration.
+     *
+     * @since 2.0.0
+     */
+    def config(key: String, value: Boolean): Builder = synchronized {
+      options += key -> value.toString
+      this
+    }
+
+    /**
+     * Sets a list of config options based on the given [[SparkConf]].
+     *
+     * @since 2.0.0
+     */
+    def config(conf: SparkConf): Builder = synchronized {
+      conf.getAll.foreach { case (k, v) => options += k -> v }
+      this
+    }
+
+    /**
+     * Sets the Spark master URL to connect to, such as "local" to run 
locally, "local[4]" to
+     * run locally with 4 cores, or "spark://master:7077" to run on a Spark 
standalone cluster.
+     *
+     * @since 2.0.0
+     */
+    def master(master: String): Builder = config("spark.master", master)
+
+    /**
+     * Enables Hive support, including connectivity to a persistent Hive 
metastore, support for
+     * Hive serdes, and Hive user-defined functions.
+     *
+     * @since 2.0.0
+     */
+    def enableHiveSupport(): Builder = synchronized {
+      if (hiveClassesArePresent) {
+        config(CATALOG_IMPLEMENTATION.key, "hive")
+      } else {
+        throw new IllegalArgumentException(
+          "Unable to instantiate SparkSession with Hive support because " +
+            "Hive classes are not found.")
+      }
+    }
+
+    /**
+     * Gets an existing [[SparkSession]] or, if there is no existing one, 
creates a new one
+     * based on the options set in this builder.
+     *
+     * @since 2.0.0
+     */
+    def getOrCreate(): SparkSession = synchronized {
+      // Step 1. Create a SparkConf
+      // Step 2. Get a SparkContext
+      // Step 3. Get a SparkSession
+      val sparkConf = new SparkConf()
+      options.foreach { case (k, v) => sparkConf.set(k, v) }
+      val sparkContext = SparkContext.getOrCreate(sparkConf)
+
+      SQLContext.getOrCreate(sparkContext).sparkSession
+    }
+  }
+
+  /**
+   * Creates a [[SparkSession.Builder]] for constructing a [[SparkSession]].
+   * @since 2.0.0
+   */
+  def builder: Builder = new Builder
+
   private val HIVE_SHARED_STATE_CLASS_NAME = 
"org.apache.spark.sql.hive.HiveSharedState"
   private val HIVE_SESSION_STATE_CLASS_NAME = 
"org.apache.spark.sql.hive.HiveSessionState"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to