git commit: [SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

rxin Thu, 19 Jun 2014 14:14:30 -0700

Repository: spark
Updated Branches:
  refs/heads/master bce0897bc -> 777c5958c



[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

Author: Michael Armbrust <[email protected]>

Closes #1129 from marmbrus/doubleCreateAs and squashes the following commits:

9c6d9e4 [Michael Armbrust] Fix typo.
5128fe2 [Michael Armbrust] Make sure InsertIntoHiveTable doesn't execute each 
time you ask for its result.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/777c5958
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/777c5958
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/777c5958

Branch: refs/heads/master
Commit: 777c5958c4088182f9e2daba435ccb413a2f69d7
Parents: bce0897
Author: Michael Armbrust <[email protected]>
Authored: Thu Jun 19 14:14:03 2014 -0700
Committer: Reynold Xin <[email protected]>
Committed: Thu Jun 19 14:14:03 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/execution/hiveOperators.scala    | 6 +++++-
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala   | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/777c5958/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index a839231..240aa0d 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
     writer.commitJob()
   }
 
+  override def execute() = result
+
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly 
serialized with the
    * `org.apache.hadoop.hive.serde2.SerDe` and the
    * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
+   *
+   * Note: this is run once and then kept to avoid double insertions.
    */
-  def execute() = {
+  private lazy val result: RDD[Row] = {
     val childRdd = child.execute()
     assert(childRdd != null)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/777c5958/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 8b2bdd5..5118f4b 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
  */
 class HiveQuerySuite extends HiveComparisonTest {
 
+  test("CREATE TABLE AS runs once") {
+    hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+    assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+      "Incorrect number of rows in created table")
+  }
+
   createQueryTest("between",
     "SELECT * FROM src WHERE key Between 1 and 2")

git commit: [SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

Reply via email to