This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 89f31df [SPARK-30822][SQL] Remove semicolon at the end of a sql query
89f31df is described below
commit 89f31df3ffb20af8bd49c4bdb1f4624aa4feadf9
Author: samsetegne <[email protected]>
AuthorDate: Wed Mar 25 15:00:15 2020 +0800
[SPARK-30822][SQL] Remove semicolon at the end of a sql query
# What changes were proposed in this pull request?
This change proposes ignoring a terminating semicolon from queries
submitted by the user (if included) instead of raising a parse exception.
# Why are the changes needed?
When a user submits a directly executable SQL statement terminated with a
semicolon, they receive an
`org.apache.spark.sql.catalyst.parser.ParseException` of `extraneous input ';'
expecting <EOF>`. SQL-92 describes a direct SQL statement as having the format
of `<directly executable statement> <semicolon>` and the majority of SQL
implementations either require the semicolon as a statement terminator, or make
it optional (meaning not raising an exception when it's included, seemingly i
[...]
# Does this PR introduce any user-facing change?
No
# How was this patch tested?
Unit test added to `PlanParserSuite`
```
sbt> project catalyst
sbt> testOnly *PlanParserSuite
[info] - case insensitive (565 milliseconds)
[info] - explain (9 milliseconds)
[info] - set operations (41 milliseconds)
[info] - common table expressions (31 milliseconds)
[info] - simple select query (47 milliseconds)
[info] - hive-style single-FROM statement (11 milliseconds)
[info] - multi select query (32 milliseconds)
[info] - query organization (41 milliseconds)
[info] - insert into (12 milliseconds)
[info] - aggregation (24 milliseconds)
[info] - limit (11 milliseconds)
[info] - window spec (11 milliseconds)
[info] - lateral view (17 milliseconds)
[info] - joins (62 milliseconds)
[info] - sampled relations (11 milliseconds)
[info] - sub-query (11 milliseconds)
[info] - scalar sub-query (9 milliseconds)
[info] - table reference (2 milliseconds)
[info] - table valued function (8 milliseconds)
[info] - SPARK-20311 range(N) as alias (2 milliseconds)
[info] - SPARK-20841 Support table column aliases in FROM clause (3
milliseconds)
[info] - SPARK-20962 Support subquery column aliases in FROM clause (4
milliseconds)
[info] - SPARK-20963 Support aliases for join relations in FROM clause (3
milliseconds)
[info] - inline table (23 milliseconds)
[info] - simple select query with !> and !< (5 milliseconds)
[info] - select hint syntax (34 milliseconds)
[info] - SPARK-20854: select hint syntax with expressions (12 milliseconds)
[info] - SPARK-20854: multiple hints (4 milliseconds)
[info] - TRIM function (16 milliseconds)
[info] - OVERLAY function (16 milliseconds)
[info] - precedence of set operations (18 milliseconds)
[info] - create/alter view as insert into table (4 milliseconds)
[info] - Invalid insert constructs in the query (10 milliseconds)
[info] - relation in v2 catalog (3 milliseconds)
[info] - CTE with column alias (2 milliseconds)
[info] - statement containing terminal semicolons (3 milliseconds)
[info] ScalaTest
[info] Run completed in 3 seconds, 129 milliseconds.
[info] Total number of tests run: 36
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 36, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[info] Passed: Total 36, Failed 0, Errors 0, Passed 36
```
### Current behavior:
#### scala
```scala
scala> val df = sql("select 1")
// df: org.apache.spark.sql.DataFrame = [1: int]
scala> df.show()
// +---+
// | 1|
// +---+
// | 1|
// +---+
scala> val df = sql("select 1;")
// org.apache.spark.sql.catalyst.parser.ParseException:
// extraneous input ';' expecting <EOF>(line 1, pos 8)
// == SQL ==
// select 1;
// --------^^^
// at
org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263)
// at
org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130)
// at
org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:52)
// at
org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:76)
// at
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:605)
// at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
// at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:605)
// ... 47 elided
```
#### pyspark
```python
df = spark.sql('select 1')
df.show()
#+---+
#| 1|
#+---+
#| 1|
#+---+
df = spark.sql('select 1;')
# Traceback (most recent call last):
# File "<stdin>", line 1, in <module>
# File "/Users/ssetegne/spark/python/pyspark/sql/session.py", line 646,
in sql
# return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
# File
"/Users/ssetegne/spark/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py",
line 1286, in # __call__
# File "/Users/ssetegne/spark/python/pyspark/sql/utils.py", line 102, in
deco
# raise converted
# pyspark.sql.utils.ParseException:
# extraneous input ';' expecting <EOF>(line 1, pos 8)
# == SQL ==
# select 1;
# --------^^^
```
### Behavior after proposed fix:
#### scala
```scala
scala> val df = sql("select 1")
// df: org.apache.spark.sql.DataFrame = [1: int]
scala> df.show()
// +---+
// | 1|
// +---+
// | 1|
// +---+
scala> val df = sql("select 1;")
// df: org.apache.spark.sql.DataFrame = [1: int]
scala> df.show()
// +---+
// | 1|
// +---+
// | 1|
// +---+
```
#### pyspark
```python
df = spark.sql('select 1')
df.show()
#+---+
#| 1 |
#+---+
#| 1 |
#+---+
df = spark.sql('select 1;')
df.show()
#+---+
#| 1 |
#+---+
#| 1 |
#+---+
```
Closes #27567 from samsetegne/semicolon.
Authored-by: samsetegne <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 44431d4b1a22c3db87d7e4a24df517d6d45905a8)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +-
.../org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala | 2 +-
.../org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala | 7 +++++++
.../src/test/resources/sql-tests/results/describe-query.sql.out | 4 ++--
sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out | 4 ++--
sql/core/src/test/resources/sql-tests/results/interval.sql.out | 2 +-
6 files changed, 14 insertions(+), 7 deletions(-)
diff --git
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 563ef69..143a567 100644
---
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -64,7 +64,7 @@ grammar SqlBase;
}
singleStatement
- : statement EOF
+ : statement ';'* EOF
;
singleExpression
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 35a54c8..543ea5d 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1480,7 +1480,7 @@ class DDLParserSuite extends AnalysisTest {
AnalyzeColumnStatement(Seq("a", "b", "c"), None, allColumns = true))
intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key,
value",
- "mismatched input 'key' expecting <EOF>")
+ "mismatched input 'key' expecting {<EOF>, ';'}")
intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL",
"missing 'COLUMNS' at '<EOF>'")
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 875096f..11230e4 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -922,4 +922,11 @@ class PlanParserSuite extends AnalysisTest {
"WITH t(x) AS (SELECT c FROM a) SELECT * FROM t",
cte(table("t").select(star()), "t" -> ((table("a").select('c),
Seq("x")))))
}
+
+ test("statement containing terminal semicolons") {
+ assertEqual("select 1;", OneRowRelation().select(1))
+ assertEqual("select a, b;", OneRowRelation().select('a, 'b))
+ assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b))
+ assertEqual("select a, b from db.c; ;; ;", table("db", "c").select('a,
'b))
+ }
}
diff --git
a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 6b16aba..2199fc0 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -112,7 +112,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
+mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21)
== SQL ==
DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
@@ -126,7 +126,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
+mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21)
== SQL ==
DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
index 8eeabb3..0a5fe7a 100644
--- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -138,7 +138,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-extraneous input 'ROLLUP' expecting <EOF>(line 1, pos 53)
+extraneous input 'ROLLUP' expecting {<EOF>, ';'}(line 1, pos 53)
== SQL ==
SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP
@@ -152,7 +152,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-extraneous input 'CUBE' expecting <EOF>(line 1, pos 53)
+extraneous input 'CUBE' expecting {<EOF>, ';'}(line 1, pos 53)
== SQL ==
SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 7a3dd74..0d758ca 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -639,7 +639,7 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
-extraneous input 'day' expecting <EOF>(line 1, pos 27)
+extraneous input 'day' expecting {<EOF>, ';'}(line 1, pos 27)
== SQL ==
select interval 30 day day day
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]