Repository: spark
Updated Branches:
refs/heads/master dcdda1978 -> 5b7d403c1
[SPARK-18094][SQL][TESTS] Move group analytics test cases from `SQLQuerySuite`
into a query file test.
## What changes were proposed in this pull request?
Currently we have several test cases for group analytics(ROLLUP/CUBE/GROUPING
SETS) in `SQLQuerySuite`, should better move them into a query file test.
The following test cases are moved to `group-analytics.sql`:
```
test("rollup")
test("grouping sets when aggregate functions containing groupBy columns")
test("cube")
test("grouping sets")
test("grouping and grouping_id")
test("grouping and grouping_id in having")
test("grouping and grouping_id in sort")
```
This is followup work of #15582
## How was this patch tested?
Modified query file `group-analytics.sql`, which will be tested by
`SQLQueryTestSuite`.
Author: jiangxingbo <[email protected]>
Closes #15624 from jiangxb1987/group-analytics-test.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b7d403c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b7d403c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b7d403c
Branch: refs/heads/master
Commit: 5b7d403c1819c32a6a5b87d470f8de1a8ad7a987
Parents: dcdda19
Author: jiangxingbo <[email protected]>
Authored: Wed Oct 26 23:51:16 2016 +0200
Committer: Reynold Xin <[email protected]>
Committed: Wed Oct 26 23:51:16 2016 +0200
----------------------------------------------------------------------
.../sql-tests/inputs/group-analytics.sql | 46 +++-
.../sql-tests/results/group-analytics.sql.out | 247 ++++++++++++++++++-
.../org/apache/spark/sql/SQLQuerySuite.scala | 189 --------------
3 files changed, 290 insertions(+), 192 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/5b7d403c/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
index 2f78349..f813538 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
@@ -10,4 +10,48 @@ SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE;
-- ROLLUP on overlapping columns
SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
-SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
\ No newline at end of file
+SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000),
("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course,
year) ORDER BY course, year;
+
+-- CUBE
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course,
year) ORDER BY course, year;
+
+-- GROUPING SETS
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(course, year);
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(course);
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(year);
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, SUM(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings))
ORDER BY course, sum;
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM
courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings))
ORDER BY course, sum;
+
+-- GROUPING/GROUPING_ID
+SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course,
year) FROM courseSales
+GROUP BY CUBE(course, year);
+SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year;
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY
course, year;
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year);
+
+-- GROUPING/GROUPING_ID in having clause
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0;
+SELECT course, year FROM courseSales GROUP BY course, year HAVING
GROUPING(course) > 0;
+SELECT course, year FROM courseSales GROUP BY course, year HAVING
GROUPING_ID(course) > 0;
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING
grouping__id > 0;
+
+-- GROUPING/GROUPING_ID in orderBy clause
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP
BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year;
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY
CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year;
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY
GROUPING(course);
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY
GROUPING_ID(course);
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY
grouping__id;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/spark/blob/5b7d403c/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
----------------------------------------------------------------------
diff --git
a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
index 8ea7de8..825e8f5 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
+-- Number of queries: 26
-- !query 0
@@ -32,7 +32,6 @@ NULL 2 0
NULL NULL 3
-
-- !query 2
SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
-- !query 2 schema
@@ -85,3 +84,247 @@ struct<a:int,b:int,sum(b):bigint>
3 2 2
3 NULL 3
NULL NULL 9
+
+
+-- !query 5
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000),
("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course,
year) ORDER BY course, year
+-- !query 6 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 6 output
+NULL NULL 113000
+Java NULL 50000
+Java 2012 20000
+Java 2013 30000
+dotNET NULL 63000
+dotNET 2012 15000
+dotNET 2013 48000
+
+
+-- !query 7
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course,
year) ORDER BY course, year
+-- !query 7 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 7 output
+NULL NULL 113000
+NULL 2012 35000
+NULL 2013 78000
+Java NULL 50000
+Java 2012 20000
+Java 2013 30000
+dotNET NULL 63000
+dotNET 2012 15000
+dotNET 2013 48000
+
+
+-- !query 8
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(course, year)
+-- !query 8 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 8 output
+Java NULL 50000
+NULL 2012 35000
+NULL 2013 78000
+dotNET NULL 63000
+
+
+-- !query 9
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(course)
+-- !query 9 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 9 output
+Java NULL 50000
+dotNET NULL 63000
+
+
+-- !query 10
+SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year
GROUPING SETS(year)
+-- !query 10 schema
+struct<course:string,year:int,sum(earnings):bigint>
+-- !query 10 output
+NULL 2012 35000
+NULL 2013 78000
+
+
+-- !query 11
+SELECT course, SUM(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings))
ORDER BY course, sum
+-- !query 11 schema
+struct<course:string,sum:bigint>
+-- !query 11 output
+NULL 113000
+Java 20000
+Java 30000
+Java 50000
+dotNET 5000
+dotNET 10000
+dotNET 48000
+dotNET 63000
+
+
+-- !query 12
+SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM
courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings))
ORDER BY course, sum
+-- !query 12 schema
+struct<course:string,sum:bigint,grouping_id(course, earnings):int>
+-- !query 12 output
+NULL 113000 3
+Java 20000 0
+Java 30000 0
+Java 50000 1
+dotNET 5000 0
+dotNET 10000 0
+dotNET 48000 0
+dotNET 63000 1
+
+
+-- !query 13
+SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course,
year) FROM courseSales
+GROUP BY CUBE(course, year)
+-- !query 13 schema
+struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course,
year):int>
+-- !query 13 output
+Java 2012 0 0 0
+Java 2013 0 0 0
+Java NULL 0 1 1
+NULL 2012 1 0 2
+NULL 2013 1 0 2
+NULL NULL 1 1 3
+dotNET 2012 0 0 0
+dotNET 2013 0 0 0
+dotNET NULL 0 1 1
+
+
+-- !query 14
+SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+grouping() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 15
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY
course, year
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 16
+SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year)
+-- !query 16 schema
+struct<>
+-- !query 16 output
+org.apache.spark.sql.AnalysisException
+grouping__id is deprecated; use grouping_id() instead;
+
+
+-- !query 17
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
+HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0
+-- !query 17 schema
+struct<course:string,year:int>
+-- !query 17 output
+Java NULL
+NULL NULL
+dotNET NULL
+
+
+-- !query 18
+SELECT course, year FROM courseSales GROUP BY course, year HAVING
GROUPING(course) > 0
+-- !query 18 schema
+struct<>
+-- !query 18 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 19
+SELECT course, year FROM courseSales GROUP BY course, year HAVING
GROUPING_ID(course) > 0
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 20
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING
grouping__id > 0
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+grouping__id is deprecated; use grouping_id() instead;
+
+
+-- !query 21
+SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP
BY CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year
+-- !query 21 schema
+struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
+-- !query 21 output
+Java 2012 0 0
+Java 2013 0 0
+dotNET 2012 0 0
+dotNET 2013 0 0
+Java NULL 0 1
+dotNET NULL 0 1
+NULL 2012 1 0
+NULL 2013 1 0
+NULL NULL 1 1
+
+
+-- !query 22
+SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY
CUBE(course, year)
+ORDER BY GROUPING(course), GROUPING(year), course, year
+-- !query 22 schema
+struct<course:string,year:int,grouping_id(course, year):int>
+-- !query 22 output
+Java 2012 0
+Java 2013 0
+dotNET 2012 0
+dotNET 2013 0
+Java NULL 1
+dotNET NULL 1
+NULL 2012 2
+NULL 2013 2
+NULL NULL 3
+
+
+-- !query 23
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY
GROUPING(course)
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 24
+SELECT course, year FROM courseSales GROUP BY course, year ORDER BY
GROUPING_ID(course)
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+
+
+-- !query 25
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY
grouping__id
+-- !query 25 schema
+struct<>
+-- !query 25 output
+org.apache.spark.sql.AnalysisException
+grouping__id is deprecated; use grouping_id() instead;
http://git-wip-us.apache.org/repos/asf/spark/blob/5b7d403c/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index bd4c253..1a43d0b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2005,195 +2005,6 @@ class SQLQuerySuite extends QueryTest with
SharedSQLContext {
Row(false) :: Row(true) :: Nil)
}
- test("rollup") {
- checkAnswer(
- sql("select course, year, sum(earnings) from courseSales group by
rollup(course, year)" +
- " order by course, year"),
- Row(null, null, 113000.0) ::
- Row("Java", null, 50000.0) ::
- Row("Java", 2012, 20000.0) ::
- Row("Java", 2013, 30000.0) ::
- Row("dotNET", null, 63000.0) ::
- Row("dotNET", 2012, 15000.0) ::
- Row("dotNET", 2013, 48000.0) :: Nil
- )
- }
-
- test("grouping sets when aggregate functions containing groupBy columns") {
- checkAnswer(
- sql("select course, sum(earnings) as sum from courseSales group by
course, earnings " +
- "grouping sets((), (course), (course, earnings)) " +
- "order by course, sum"),
- Row(null, 113000.0) ::
- Row("Java", 20000.0) ::
- Row("Java", 30000.0) ::
- Row("Java", 50000.0) ::
- Row("dotNET", 5000.0) ::
- Row("dotNET", 10000.0) ::
- Row("dotNET", 48000.0) ::
- Row("dotNET", 63000.0) :: Nil
- )
-
- checkAnswer(
- sql("select course, sum(earnings) as sum, grouping_id(course, earnings)
from courseSales " +
- "group by course, earnings grouping sets((), (course), (course,
earnings)) " +
- "order by course, sum"),
- Row(null, 113000.0, 3) ::
- Row("Java", 20000.0, 0) ::
- Row("Java", 30000.0, 0) ::
- Row("Java", 50000.0, 1) ::
- Row("dotNET", 5000.0, 0) ::
- Row("dotNET", 10000.0, 0) ::
- Row("dotNET", 48000.0, 0) ::
- Row("dotNET", 63000.0, 1) :: Nil
- )
- }
-
- test("cube") {
- checkAnswer(
- sql("select course, year, sum(earnings) from courseSales group by
cube(course, year)"),
- Row("Java", 2012, 20000.0) ::
- Row("Java", 2013, 30000.0) ::
- Row("Java", null, 50000.0) ::
- Row("dotNET", 2012, 15000.0) ::
- Row("dotNET", 2013, 48000.0) ::
- Row("dotNET", null, 63000.0) ::
- Row(null, 2012, 35000.0) ::
- Row(null, 2013, 78000.0) ::
- Row(null, null, 113000.0) :: Nil
- )
- }
-
- test("grouping sets") {
- checkAnswer(
- sql("select course, year, sum(earnings) from courseSales group by
course, year " +
- "grouping sets(course, year)"),
- Row("Java", null, 50000.0) ::
- Row("dotNET", null, 63000.0) ::
- Row(null, 2012, 35000.0) ::
- Row(null, 2013, 78000.0) :: Nil
- )
-
- checkAnswer(
- sql("select course, year, sum(earnings) from courseSales group by
course, year " +
- "grouping sets(course)"),
- Row("Java", null, 50000.0) ::
- Row("dotNET", null, 63000.0) :: Nil
- )
-
- checkAnswer(
- sql("select course, year, sum(earnings) from courseSales group by
course, year " +
- "grouping sets(year)"),
- Row(null, 2012, 35000.0) ::
- Row(null, 2013, 78000.0) :: Nil
- )
- }
-
- test("grouping and grouping_id") {
- checkAnswer(
- sql("select course, year, grouping(course), grouping(year),
grouping_id(course, year)" +
- " from courseSales group by cube(course, year)"),
- Row("Java", 2012, 0, 0, 0) ::
- Row("Java", 2013, 0, 0, 0) ::
- Row("Java", null, 0, 1, 1) ::
- Row("dotNET", 2012, 0, 0, 0) ::
- Row("dotNET", 2013, 0, 0, 0) ::
- Row("dotNET", null, 0, 1, 1) ::
- Row(null, 2012, 1, 0, 2) ::
- Row(null, 2013, 1, 0, 2) ::
- Row(null, null, 1, 1, 3) :: Nil
- )
-
- var error = intercept[AnalysisException] {
- sql("select course, year, grouping(course) from courseSales group by
course, year")
- }
- assert(error.getMessage contains "grouping() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year, grouping_id(course, year) from courseSales
group by course, year")
- }
- assert(error.getMessage contains "grouping_id() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year, grouping__id from courseSales group by
cube(course, year)")
- }
- assert(error.getMessage contains "grouping__id is deprecated; use
grouping_id() instead")
- }
-
- test("grouping and grouping_id in having") {
- checkAnswer(
- sql("select course, year from courseSales group by cube(course, year)" +
- " having grouping(year) = 1 and grouping_id(course, year) > 0"),
- Row("Java", null) ::
- Row("dotNET", null) ::
- Row(null, null) :: Nil
- )
-
- var error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by course, year" +
- " having grouping(course) > 0")
- }
- assert(error.getMessage contains
- "grouping()/grouping_id() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by course, year" +
- " having grouping_id(course, year) > 0")
- }
- assert(error.getMessage contains
- "grouping()/grouping_id() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by cube(course, year)" +
- " having grouping__id > 0")
- }
- assert(error.getMessage contains "grouping__id is deprecated; use
grouping_id() instead")
- }
-
- test("grouping and grouping_id in sort") {
- checkAnswer(
- sql("select course, year, grouping(course), grouping(year) from
courseSales" +
- " group by cube(course, year) order by grouping_id(course, year),
course, year"),
- Row("Java", 2012, 0, 0) ::
- Row("Java", 2013, 0, 0) ::
- Row("dotNET", 2012, 0, 0) ::
- Row("dotNET", 2013, 0, 0) ::
- Row("Java", null, 0, 1) ::
- Row("dotNET", null, 0, 1) ::
- Row(null, 2012, 1, 0) ::
- Row(null, 2013, 1, 0) ::
- Row(null, null, 1, 1) :: Nil
- )
-
- checkAnswer(
- sql("select course, year, grouping_id(course, year) from courseSales" +
- " group by cube(course, year) order by grouping(course),
grouping(year), course, year"),
- Row("Java", 2012, 0) ::
- Row("Java", 2013, 0) ::
- Row("dotNET", 2012, 0) ::
- Row("dotNET", 2013, 0) ::
- Row("Java", null, 1) ::
- Row("dotNET", null, 1) ::
- Row(null, 2012, 2) ::
- Row(null, 2013, 2) ::
- Row(null, null, 3) :: Nil
- )
-
- var error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by course, year" +
- " order by grouping(course)")
- }
- assert(error.getMessage contains
- "grouping()/grouping_id() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by course, year" +
- " order by grouping_id(course, year)")
- }
- assert(error.getMessage contains
- "grouping()/grouping_id() can only be used with
GroupingSets/Cube/Rollup")
- error = intercept[AnalysisException] {
- sql("select course, year from courseSales group by cube(course, year)" +
- " order by grouping__id")
- }
- assert(error.getMessage contains "grouping__id is deprecated; use
grouping_id() instead")
- }
-
test("filter on a grouping column that is not presented in SELECT") {
checkAnswer(
sql("select count(1) from (select 1 as a) t group by a having a > 0"),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]