Repository: spark Updated Branches: refs/heads/branch-2.0 61738a38a -> f0279b05c
[SPARK-15824][SQL] Execute WITH .... INSERT ... statements immediately ## What changes were proposed in this pull request? We currently immediately execute `INSERT` commands when they are issued. This is not the case as soon as we use a `WITH` to define common table expressions, for example: ```sql WITH tbl AS (SELECT * FROM x WHERE id = 10) INSERT INTO y SELECT * FROM tbl ``` This PR fixes this problem. This PR closes https://github.com/apache/spark/pull/13561 (which fixes the a instance of this problem in the ThriftSever). ## How was this patch tested? Added a test to `InsertSuite` Author: Herman van Hovell <[email protected]> Closes #13678 from hvanhovell/SPARK-15824. (cherry picked from commit b75f454f946714b93fe561055cd53b0686187d2e) Signed-off-by: Wenchen Fan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0279b05 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0279b05 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0279b05 Branch: refs/heads/branch-2.0 Commit: f0279b05cc893fb7671bdbcbf20aded1c427d6e0 Parents: 61738a3 Author: Herman van Hovell <[email protected]> Authored: Wed Jun 15 21:33:26 2016 -0700 Committer: Wenchen Fan <[email protected]> Committed: Wed Jun 15 21:34:09 2016 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/Dataset.scala | 2 +- .../apache/spark/sql/sources/InsertSuite.scala | 25 ++++++++++++++++++++ .../spark/sql/hive/HiveMetastoreCatalog.scala | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index fba4066..02cc398 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -179,7 +179,7 @@ class Dataset[T] private[sql]( case _ => false } - queryExecution.logical match { + queryExecution.analyzed match { // For various commands (like DDL) and queries with side effects, we force query execution // to happen right away to let these side effects take place eagerly. case p if hasSideEffects(p) => http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 4780eb4..bade41b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.sources import java.io.File import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.util.Utils @@ -259,4 +260,28 @@ class InsertSuite extends DataSourceTest with SharedSQLContext { spark.catalog.dropTempView("oneToTen") } + + test("SPARK-15824 - Execute an INSERT wrapped in a WITH statement immediately") { + withTable("target", "target2") { + sql(s"CREATE TABLE target(a INT, b STRING) USING JSON") + sql("WITH tbl AS (SELECT * FROM jt) INSERT OVERWRITE TABLE target SELECT a, b FROM tbl") + checkAnswer( + sql("SELECT a, b FROM target"), + sql("SELECT a, b FROM jt") + ) + + sql(s"CREATE TABLE target2(a INT, b STRING) USING JSON") + val e = sql( + """ + |WITH tbl AS (SELECT * FROM jt) + |FROM tbl + |INSERT INTO target2 SELECT a, b WHERE a <= 5 + |INSERT INTO target2 SELECT a, b WHERE a > 5 + """.stripMargin) + checkAnswer( + sql("SELECT a, b FROM target2"), + sql("SELECT a, b FROM jt") + ) + } + } } http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index d24cde2..224ff38 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -544,7 +544,7 @@ private[hive] case class InsertIntoHiveTable( child: LogicalPlan, overwrite: Boolean, ifNotExists: Boolean) - extends LogicalPlan { + extends LogicalPlan with Command { override def children: Seq[LogicalPlan] = child :: Nil override def output: Seq[Attribute] = Seq.empty --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
