Repository: spark
Updated Branches:
  refs/heads/master 6eda55f72 -> 9f5c77ae3


[SPARK-21983][SQL] Fix Antlr 4.7 deprecation warnings

## What changes were proposed in this pull request?

Fix three deprecation warnings introduced by move to ANTLR 4.7:

* Use ParserRuleContext.addChild(TerminalNode) in preference to
  deprecated ParserRuleContext.addChild(Token) interface.
* TokenStream.reset() is deprecated in favour of seek(0)
* Replace use of deprecated ANTLRInputStream with stream returned by
  CharStreams.fromString()

The last item changed the way we construct ANTLR's input stream (from
direct instantiation to factory construction), so necessitated a change
to how we override the LA() method to always return an upper-case
char. The ANTLR object is now wrapped, rather than inherited-from.

* Also fix incorrect usage of CharStream.getText() which expects the rhs
  of the supplied interval to be the last char to be returned, i.e. the
  interval is inclusive, and work around bug in ANTLR 4.7 where empty
  streams or intervals may cause getText() to throw an error.

## How was this patch tested?

Ran all the sql tests. Confirmed that LA() override has coverage by
breaking it, and noting that tests failed.

Author: Henry Robinson <[email protected]>

Closes #19578 from henryr/spark-21983.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f5c77ae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f5c77ae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f5c77ae

Branch: refs/heads/master
Commit: 9f5c77ae32890b892b69b45a2833b9d6d6866aea
Parents: 6eda55f
Author: Henry Robinson <[email protected]>
Authored: Mon Oct 30 07:45:54 2017 +0000
Committer: Sean Owen <[email protected]>
Committed: Mon Oct 30 07:45:54 2017 +0000

----------------------------------------------------------------------
 .../spark/sql/catalyst/parser/ParseDriver.scala | 39 ++++++++++++++++----
 .../spark/sql/catalyst/parser/ParserUtils.scala |  4 +-
 .../sql/catalyst/parser/ParserUtilsSuite.scala  |  4 +-
 3 files changed, 35 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 0d9ad21..4c20f23 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -18,7 +18,8 @@ package org.apache.spark.sql.catalyst.parser
 
 import org.antlr.v4.runtime._
 import org.antlr.v4.runtime.atn.PredictionMode
-import org.antlr.v4.runtime.misc.ParseCancellationException
+import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
+import org.antlr.v4.runtime.tree.TerminalNodeImpl
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -80,7 +81,7 @@ abstract class AbstractSqlParser extends ParserInterface with 
Logging {
   protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     logDebug(s"Parsing command: $command")
 
-    val lexer = new SqlBaseLexer(new ANTLRNoCaseStringStream(command))
+    val lexer = new SqlBaseLexer(new 
UpperCaseCharStream(CharStreams.fromString(command)))
     lexer.removeErrorListeners()
     lexer.addErrorListener(ParseErrorListener)
 
@@ -99,7 +100,7 @@ abstract class AbstractSqlParser extends ParserInterface 
with Logging {
       catch {
         case e: ParseCancellationException =>
           // if we fail, parse with LL mode
-          tokenStream.reset() // rewind input stream
+          tokenStream.seek(0) // rewind input stream
           parser.reset()
 
           // Try Again.
@@ -148,12 +149,33 @@ object CatalystSqlParser extends AbstractSqlParser {
  * the consume() function of the super class ANTLRStringStream. The LA() 
function is the lookahead
  * function and is purely used for matching lexical rules. This also means 
that the grammar will
  * only accept capitalized tokens in case it is run from other tools like 
antlrworks which do not
- * have the ANTLRNoCaseStringStream implementation.
+ * have the UpperCaseCharStream implementation.
  */
 
-private[parser] class ANTLRNoCaseStringStream(input: String) extends 
ANTLRInputStream(input) {
+private[parser] class UpperCaseCharStream(wrapped: CodePointCharStream) 
extends CharStream {
+  override def consume(): Unit = wrapped.consume
+  override def getSourceName(): String = wrapped.getSourceName
+  override def index(): Int = wrapped.index
+  override def mark(): Int = wrapped.mark
+  override def release(marker: Int): Unit = wrapped.release(marker)
+  override def seek(where: Int): Unit = wrapped.seek(where)
+  override def size(): Int = wrapped.size
+
+  override def getText(interval: Interval): String = {
+    // ANTLR 4.7's CodePointCharStream implementations have bugs when
+    // getText() is called with an empty stream, or intervals where
+    // the start > end. See
+    // https://github.com/antlr/antlr4/commit/ac9f7530 for one fix
+    // that is not yet in a released ANTLR artifact.
+    if (size() > 0 && (interval.b - interval.a >= 0)) {
+      wrapped.getText(interval)
+    } else {
+      ""
+    }
+  }
+
   override def LA(i: Int): Int = {
-    val la = super.LA(i)
+    val la = wrapped.LA(i)
     if (la == 0 || la == IntStream.EOF) la
     else Character.toUpperCase(la)
   }
@@ -244,11 +266,12 @@ case object PostProcessor extends SqlBaseBaseListener {
     val parent = ctx.getParent
     parent.removeLastChild()
     val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
-    parent.addChild(f(new CommonToken(
+    val newToken = new CommonToken(
       new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, 
token.getInputStream),
       SqlBaseParser.IDENTIFIER,
       token.getChannel,
       token.getStartIndex + stripMargins,
-      token.getStopIndex - stripMargins)))
+      token.getStopIndex - stripMargins)
+    parent.addChild(new TerminalNodeImpl(f(newToken)))
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 9c1031e..9b127f9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -32,7 +32,7 @@ object ParserUtils {
   /** Get the command which created the token. */
   def command(ctx: ParserRuleContext): String = {
     val stream = ctx.getStart.getInputStream
-    stream.getText(Interval.of(0, stream.size()))
+    stream.getText(Interval.of(0, stream.size() - 1))
   }
 
   def operationNotAllowed(message: String, ctx: ParserRuleContext): Nothing = {
@@ -58,7 +58,7 @@ object ParserUtils {
   /** Get all the text which comes after the given token. */
   def remainder(token: Token): String = {
     val stream = token.getInputStream
-    val interval = Interval.of(token.getStopIndex + 1, stream.size())
+    val interval = Interval.of(token.getStopIndex + 1, stream.size() - 1)
     stream.getText(interval)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index d5748a4..768030f 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.catalyst.parser
 
-import org.antlr.v4.runtime.{CommonTokenStream, ParserRuleContext}
+import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
@@ -57,7 +57,7 @@ class ParserUtilsSuite extends SparkFunSuite {
   }
 
   private def buildContext[T](command: String)(toResult: SqlBaseParser => T): 
T = {
-    val lexer = new SqlBaseLexer(new ANTLRNoCaseStringStream(command))
+    val lexer = new SqlBaseLexer(new 
UpperCaseCharStream(CharStreams.fromString(command)))
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
     toResult(parser)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to