Repository: spark Updated Branches: refs/heads/master bce0897bc -> 777c5958c
[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once. Author: Michael Armbrust <[email protected]> Closes #1129 from marmbrus/doubleCreateAs and squashes the following commits: 9c6d9e4 [Michael Armbrust] Fix typo. 5128fe2 [Michael Armbrust] Make sure InsertIntoHiveTable doesn't execute each time you ask for its result. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/777c5958 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/777c5958 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/777c5958 Branch: refs/heads/master Commit: 777c5958c4088182f9e2daba435ccb413a2f69d7 Parents: bce0897 Author: Michael Armbrust <[email protected]> Authored: Thu Jun 19 14:14:03 2014 -0700 Committer: Reynold Xin <[email protected]> Committed: Thu Jun 19 14:14:03 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/hive/execution/hiveOperators.scala | 6 +++++- .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/777c5958/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index a839231..240aa0d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -344,12 +344,16 @@ case class InsertIntoHiveTable( writer.commitJob() } + override def execute() = result + /** * Inserts all the rows in the table into Hive. Row objects are properly serialized with the * `org.apache.hadoop.hive.serde2.SerDe` and the * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition. + * + * Note: this is run once and then kept to avoid double insertions. */ - def execute() = { + private lazy val result: RDD[Row] = { val childRdd = child.execute() assert(childRdd != null) http://git-wip-us.apache.org/repos/asf/spark/blob/777c5958/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 8b2bdd5..5118f4b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row} */ class HiveQuerySuite extends HiveComparisonTest { + test("CREATE TABLE AS runs once") { + hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect() + assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1, + "Incorrect number of rows in created table") + } + createQueryTest("between", "SELECT * FROM src WHERE key Between 1 and 2")
