This is an automated email from the ASF dual-hosted git repository.
tgraves pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 4be2a79 [SPARK-30049][SQL] SQL fails to parse when comment contains
an unmatched quote character
4be2a79 is described below
commit 4be2a79c7a9a1b1e3b0c3704e94da19c2b87ba47
Author: Javier <[email protected]>
AuthorDate: Tue Mar 3 09:55:15 2020 -0600
[SPARK-30049][SQL] SQL fails to parse when comment contains an unmatched
quote character
### What changes were proposed in this pull request?
A SQL statement that contains a comment with an unmatched quote character
can lead to a parse error:
- Added a insideComment flag in the splitter method to avoid checking
single and double quotes within a comment:
```
spark-sql> SELECT 1 -- someone's comment here
> ;
Error in query:
extraneous input ';' expecting <EOF>(line 2, pos 0)
== SQL ==
SELECT 1 -- someone's comment here
;
^^^
```
### Why are the changes needed?
This misbehaviour was not present on previous spark versions.
### Does this PR introduce any user-facing change?
- No
### How was this patch tested?
- New tests were added.
Closes #27321 from javierivanov/SPARK-30049B.
Lead-authored-by: Javier <[email protected]>
Co-authored-by: Javier Fuentes <[email protected]>
Signed-off-by: Thomas Graves <[email protected]>
(cherry picked from commit 3ff213568694e265466d8480b61fd38f4fd8fdff)
Signed-off-by: Thomas Graves <[email protected]>
---
.../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 24 ++++++++++++++++++----
.../spark/sql/hive/thriftserver/CliSuite.scala | 22 ++++++++++++++++++++
2 files changed, 42 insertions(+), 4 deletions(-)
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b665d4a..19f7ea8 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -509,24 +509,40 @@ private[hive] class SparkSQLCLIDriver extends CliDriver
with Logging {
private def splitSemiColon(line: String): JList[String] = {
var insideSingleQuote = false
var insideDoubleQuote = false
+ var insideComment = false
var escape = false
var beginIndex = 0
+ var endIndex = line.length
val ret = new JArrayList[String]
+
for (index <- 0 until line.length) {
- if (line.charAt(index) == '\'') {
+ if (line.charAt(index) == '\'' && !insideComment) {
// take a look to see if it is escaped
if (!escape) {
// flip the boolean variable
insideSingleQuote = !insideSingleQuote
}
- } else if (line.charAt(index) == '\"') {
+ } else if (line.charAt(index) == '\"' && !insideComment) {
// take a look to see if it is escaped
if (!escape) {
// flip the boolean variable
insideDoubleQuote = !insideDoubleQuote
}
+ } else if (line.charAt(index) == '-') {
+ val hasNext = index + 1 < line.length
+ if (insideDoubleQuote || insideSingleQuote || insideComment) {
+ // Ignores '-' in any case of quotes or comment.
+ // Avoids to start a comment(--) within a quoted segment or already
in a comment.
+ // Sample query: select "quoted value --"
+ // ^^ avoids starting a comment
if it's inside quotes.
+ } else if (hasNext && line.charAt(index + 1) == '-') {
+ // ignore quotes and ;
+ insideComment = true
+ // ignore eol
+ endIndex = index
+ }
} else if (line.charAt(index) == ';') {
- if (insideSingleQuote || insideDoubleQuote) {
+ if (insideSingleQuote || insideDoubleQuote || insideComment) {
// do not split
} else {
// split, do not include ; itself
@@ -543,7 +559,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver
with Logging {
escape = true
}
}
- ret.add(line.substring(beginIndex))
+ ret.add(line.substring(beginIndex, endIndex))
ret
}
}
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 6609701..43aafc3 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -400,4 +400,26 @@ class CliSuite extends SparkFunSuite with
BeforeAndAfterAll with Logging {
-> "1.000000000000000000"
)
}
+
+ test("SPARK-30049 Should not complain for quotes in commented lines") {
+ runCliWithin(1.minute)(
+ """SELECT concat('test', 'comment') -- someone's comment here
+ |;""".stripMargin -> "testcomment"
+ )
+ }
+
+ test("SPARK-30049 Should not complain for quotes in commented with
multi-lines") {
+ runCliWithin(1.minute)(
+ """SELECT concat('test', 'comment') -- someone's comment here \\
+ | comment continues here with single ' quote \\
+ | extra ' \\
+ |;""".stripMargin -> "testcomment"
+ )
+ runCliWithin(1.minute)(
+ """SELECT concat('test', 'comment') -- someone's comment here \\
+ | comment continues here with single ' quote \\
+ | extra ' \\
+ | ;""".stripMargin -> "testcomment"
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]