Repository: spark
Updated Branches:
  refs/heads/branch-2.0 61738a38a -> f0279b05c


[SPARK-15824][SQL] Execute WITH .... INSERT ... statements immediately

## What changes were proposed in this pull request?
We currently immediately execute `INSERT` commands when they are issued. This 
is not the case as soon as we use a `WITH` to define common table expressions, 
for example:
```sql
WITH
tbl AS (SELECT * FROM x WHERE id = 10)
INSERT INTO y
SELECT *
FROM   tbl
```

This PR fixes this problem. This PR closes 
https://github.com/apache/spark/pull/13561 (which fixes the a instance of this 
problem in the ThriftSever).

## How was this patch tested?
Added a test to `InsertSuite`

Author: Herman van Hovell <[email protected]>

Closes #13678 from hvanhovell/SPARK-15824.

(cherry picked from commit b75f454f946714b93fe561055cd53b0686187d2e)
Signed-off-by: Wenchen Fan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0279b05
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0279b05
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0279b05

Branch: refs/heads/branch-2.0
Commit: f0279b05cc893fb7671bdbcbf20aded1c427d6e0
Parents: 61738a3
Author: Herman van Hovell <[email protected]>
Authored: Wed Jun 15 21:33:26 2016 -0700
Committer: Wenchen Fan <[email protected]>
Committed: Wed Jun 15 21:34:09 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/Dataset.scala    |  2 +-
 .../apache/spark/sql/sources/InsertSuite.scala  | 25 ++++++++++++++++++++
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |  2 +-
 3 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index fba4066..02cc398 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -179,7 +179,7 @@ class Dataset[T] private[sql](
       case _ => false
     }
 
-    queryExecution.logical match {
+    queryExecution.analyzed match {
       // For various commands (like DDL) and queries with side effects, we 
force query execution
       // to happen right away to let these side effects take place eagerly.
       case p if hasSideEffects(p) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 4780eb4..bade41b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.sources
 import java.io.File
 
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.Utils
 
@@ -259,4 +260,28 @@ class InsertSuite extends DataSourceTest with 
SharedSQLContext {
 
     spark.catalog.dropTempView("oneToTen")
   }
+
+  test("SPARK-15824 - Execute an INSERT wrapped in a WITH statement 
immediately") {
+    withTable("target", "target2") {
+      sql(s"CREATE TABLE target(a INT, b STRING) USING JSON")
+      sql("WITH tbl AS (SELECT * FROM jt) INSERT OVERWRITE TABLE target SELECT 
a, b FROM tbl")
+      checkAnswer(
+        sql("SELECT a, b FROM target"),
+        sql("SELECT a, b FROM jt")
+      )
+
+      sql(s"CREATE TABLE target2(a INT, b STRING) USING JSON")
+      val e = sql(
+        """
+          |WITH tbl AS (SELECT * FROM jt)
+          |FROM tbl
+          |INSERT INTO target2 SELECT a, b WHERE a <= 5
+          |INSERT INTO target2 SELECT a, b WHERE a > 5
+        """.stripMargin)
+      checkAnswer(
+        sql("SELECT a, b FROM target2"),
+        sql("SELECT a, b FROM jt")
+      )
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/f0279b05/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index d24cde2..224ff38 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -544,7 +544,7 @@ private[hive] case class InsertIntoHiveTable(
     child: LogicalPlan,
     overwrite: Boolean,
     ifNotExists: Boolean)
-  extends LogicalPlan {
+  extends LogicalPlan with Command {
 
   override def children: Seq[LogicalPlan] = child :: Nil
   override def output: Seq[Attribute] = Seq.empty


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to