Repository: spark
Updated Branches:
  refs/heads/branch-2.0 003c44792 -> 141e910af


[SPARK-15789][SQL] Allow reserved keywords in most places

## What changes were proposed in this pull request?
The parser currently does not allow the use of some SQL keywords as table or 
field names. This PR adds supports for all keywords as identifier. The 
exception to this are table aliases, in this case most keywords are allowed 
except for join keywords (```anti, full, inner, left, semi, right, natural, on, 
join, cross```) and set-operator keywords (```union, intersect, except```).

## How was this patch tested?
I have added/move/renamed test in the catalyst `*ParserSuite`s.

Author: Herman van Hovell <[email protected]>

Closes #13534 from hvanhovell/SPARK-15789.

(cherry picked from commit 91fbc880b69bddcf5310afecc49df1102408e1f3)
Signed-off-by: Wenchen Fan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/141e910a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/141e910a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/141e910a

Branch: refs/heads/branch-2.0
Commit: 141e910af97a5e84082e25f1d5b3c3a9e6c231fc
Parents: 003c447
Author: Herman van Hovell <[email protected]>
Authored: Tue Jun 7 17:01:11 2016 -0700
Committer: Wenchen Fan <[email protected]>
Committed: Tue Jun 7 17:01:26 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 30 ++++++++++----------
 .../spark/sql/catalyst/parser/AstBuilder.scala  | 12 +++++---
 .../catalyst/parser/DataTypeParserSuite.scala   |  6 +++-
 .../sql/catalyst/parser/ErrorParserSuite.scala  |  2 --
 .../sql/catalyst/parser/PlanParserSuite.scala   |  1 +
 .../parser/TableIdentifierParserSuite.scala     | 12 ++++----
 6 files changed, 35 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 2dd3cfa..d102559 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -109,9 +109,9 @@ statement
     | SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))?         
#showFunctions
     | SHOW CREATE TABLE tableIdentifier                                
#showCreateTable
     | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName            
#describeFunction
+    | (DESC | DESCRIBE) DATABASE EXTENDED? identifier                  
#describeDatabase
     | (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)?
         tableIdentifier partitionSpec? describeColName?                
#describeTable
-    | (DESC | DESCRIBE) DATABASE EXTENDED? identifier                  
#describeDatabase
     | REFRESH TABLE tableIdentifier                                    
#refreshTable
     | CACHE LAZY? TABLE identifier (AS? query)?                        
#cacheTable
     | UNCACHE TABLE identifier                                         
#uncacheTable
@@ -251,7 +251,7 @@ tableProperty
     ;
 
 tablePropertyKey
-    : looseIdentifier ('.' looseIdentifier)*
+    : identifier ('.' identifier)*
     | STRING
     ;
 
@@ -419,9 +419,9 @@ identifierComment
     ;
 
 relationPrimary
-    : tableIdentifier sample? (AS? identifier)?                     #tableName
-    | '(' queryNoWith ')' sample? (AS? identifier)?                 
#aliasedQuery
-    | '(' relation ')' sample? (AS? identifier)?                    
#aliasedRelation
+    : tableIdentifier sample? (AS? strictIdentifier)?               #tableName
+    | '(' queryNoWith ')' sample? (AS? strictIdentifier)?           
#aliasedQuery
+    | '(' relation ')' sample? (AS? strictIdentifier)?              
#aliasedRelation
     | inlineTable                                                   
#inlineTableDefault2
     ;
 
@@ -456,8 +456,8 @@ expression
     ;
 
 booleanExpression
-    : predicated                                                   
#booleanDefault
-    | NOT booleanExpression                                        #logicalNot
+    : NOT booleanExpression                                        #logicalNot
+    | predicated                                                   
#booleanDefault
     | left=booleanExpression operator=AND right=booleanExpression  
#logicalBinary
     | left=booleanExpression operator=OR right=booleanExpression   
#logicalBinary
     | EXISTS '(' query ')'                                         #exists
@@ -597,16 +597,13 @@ qualifiedName
     : identifier ('.' identifier)*
     ;
 
-// Identifier that also allows the use of a number of SQL keywords (mainly for 
backwards compatibility).
-looseIdentifier
-    : identifier
-    | FROM
-    | TO
-    | TABLE
-    | WITH
+identifier
+    : strictIdentifier
+    | ANTI | FULL | INNER | LEFT | SEMI | RIGHT | NATURAL | JOIN | CROSS | ON
+    | UNION | INTERSECT | EXCEPT
     ;
 
-identifier
+strictIdentifier
     : IDENTIFIER             #unquotedIdentifier
     | quotedIdentifier       #quotedIdentifierAlternative
     | nonReserved            #unquotedIdentifier
@@ -652,6 +649,9 @@ nonReserved
     | AT | NULLS | OVERWRITE | ALL | ALTER | AS | BETWEEN | BY | CREATE | 
DELETE
     | DESCRIBE | DROP | EXISTS | FALSE | FOR | GROUP | IN | INSERT | INTO | IS 
|LIKE
     | NULL | ORDER | OUTER | TABLE | TRUE | WITH | RLIKE
+    | AND | CASE | CAST | DISTINCT | DIV | ELSE | END | FUNCTION | INTERVAL | 
MACRO | OR | STRATIFY | THEN
+    | UNBOUNDED | WHEN
+    | DATABASE | SELECT | FROM | WHERE | HAVING | TO | TABLE | WITH | NOT
     ;
 
 SELECT: 'SELECT';

http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3473fee..e380643 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -642,7 +642,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   override def visitTableName(ctx: TableNameContext): LogicalPlan = 
withOrigin(ctx) {
     val table = UnresolvedRelation(
       visitTableIdentifier(ctx.tableIdentifier),
-      Option(ctx.identifier).map(_.getText))
+      Option(ctx.strictIdentifier).map(_.getText))
     table.optionalMap(ctx.sample)(withSample)
   }
 
@@ -692,7 +692,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
    * hooks.
    */
   override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan 
= withOrigin(ctx) {
-    
plan(ctx.relation).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
+    plan(ctx.relation)
+      .optionalMap(ctx.sample)(withSample)
+      .optionalMap(ctx.strictIdentifier)(aliasPlan)
   }
 
   /**
@@ -701,13 +703,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
    * hooks.
    */
   override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = 
withOrigin(ctx) {
-    
plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
+    plan(ctx.queryNoWith)
+      .optionalMap(ctx.sample)(withSample)
+      .optionalMap(ctx.strictIdentifier)(aliasPlan)
   }
 
   /**
    * Create an alias (SubqueryAlias) for a LogicalPlan.
    */
-  private def aliasPlan(alias: IdentifierContext, plan: LogicalPlan): 
LogicalPlan = {
+  private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): 
LogicalPlan = {
     SubqueryAlias(alias.getText, plan)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 4078297..020fb16 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 
-class CatalystQlDataTypeParserSuite extends SparkFunSuite {
+class DataTypeParserSuite extends SparkFunSuite {
 
   def parse(sql: String): DataType = CatalystSqlParser.parseDataType(sql)
 
@@ -133,4 +133,8 @@ class CatalystQlDataTypeParserSuite extends SparkFunSuite {
   checkDataType(
     "struct<`x``y` int>",
     (new StructType).add("x`y", IntegerType))
+
+  // Use SQL keywords.
+  checkDataType("struct<end: long, select: int, from: string>",
+    (new StructType).add("end", LongType).add("select", 
IntegerType).add("from", StringType))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index 6da3eae..f67697e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -39,8 +39,6 @@ class ErrorParserSuite extends SparkFunSuite {
   }
 
   test("no viable input") {
-    intercept("select from tbl", 1, 7, "no viable alternative at input", 
"-------^^^")
-    intercept("select\nfrom tbl", 2, 0, "no viable alternative at input", 
"^^^")
     intercept("select ((r + 1) ", 1, 16, "no viable alternative at input", 
"----------------^^^")
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index a6fad2d..77023cf 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -107,6 +107,7 @@ class PlanParserSuite extends PlanTest {
       table("db", "c").select('a, 'b).where('x < 1))
     assertEqual("select distinct a, b from db.c", Distinct(table("db", 
"c").select('a, 'b)))
     assertEqual("select all a, b from db.c", table("db", "c").select('a, 'b))
+    assertEqual("select from tbl", OneRowRelation.select('from.as("tbl")))
   }
 
   test("reverse select query") {

http://git-wip-us.apache.org/repos/asf/spark/blob/141e910a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index bef7d38..8bbf87e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -53,8 +53,9 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     "bigint", "binary", "boolean", "current_date", "current_timestamp", 
"date", "double", "float",
     "int", "smallint", "timestamp", "at")
 
-  val hiveNonReservedRegression = Seq("left", "right", "left", "right", 
"full", "inner", "semi",
-    "union", "except", "intersect", "schema", "database")
+  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", 
"semi", "right",
+    "natural", "union", "intersect", "except", "database", "on", "join", 
"cross", "select", "from",
+    "where", "having", "from", "to", "table", "with", "not")
 
   test("table identifier") {
     // Regular names.
@@ -67,11 +68,10 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     }
   }
 
-  test("table identifier - keywords") {
+  test("table identifier - strict keywords") {
     // SQL Keywords.
-    val keywords = Seq("select", "from", "where") ++ hiveNonReservedRegression
-    keywords.foreach { keyword =>
-      intercept[ParseException](parseTableIdentifier(keyword))
+    hiveStrictNonReservedKeyword.foreach { keyword =>
+      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
       assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
       assert(TableIdentifier(keyword, Option("db")) === 
parseTableIdentifier(s"db.`$keyword`"))
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to