Repository: spark Updated Branches: refs/heads/master 6eda55f72 -> 9f5c77ae3
[SPARK-21983][SQL] Fix Antlr 4.7 deprecation warnings ## What changes were proposed in this pull request? Fix three deprecation warnings introduced by move to ANTLR 4.7: * Use ParserRuleContext.addChild(TerminalNode) in preference to deprecated ParserRuleContext.addChild(Token) interface. * TokenStream.reset() is deprecated in favour of seek(0) * Replace use of deprecated ANTLRInputStream with stream returned by CharStreams.fromString() The last item changed the way we construct ANTLR's input stream (from direct instantiation to factory construction), so necessitated a change to how we override the LA() method to always return an upper-case char. The ANTLR object is now wrapped, rather than inherited-from. * Also fix incorrect usage of CharStream.getText() which expects the rhs of the supplied interval to be the last char to be returned, i.e. the interval is inclusive, and work around bug in ANTLR 4.7 where empty streams or intervals may cause getText() to throw an error. ## How was this patch tested? Ran all the sql tests. Confirmed that LA() override has coverage by breaking it, and noting that tests failed. Author: Henry Robinson <[email protected]> Closes #19578 from henryr/spark-21983. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f5c77ae Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f5c77ae Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f5c77ae Branch: refs/heads/master Commit: 9f5c77ae32890b892b69b45a2833b9d6d6866aea Parents: 6eda55f Author: Henry Robinson <[email protected]> Authored: Mon Oct 30 07:45:54 2017 +0000 Committer: Sean Owen <[email protected]> Committed: Mon Oct 30 07:45:54 2017 +0000 ---------------------------------------------------------------------- .../spark/sql/catalyst/parser/ParseDriver.scala | 39 ++++++++++++++++---- .../spark/sql/catalyst/parser/ParserUtils.scala | 4 +- .../sql/catalyst/parser/ParserUtilsSuite.scala | 4 +- 3 files changed, 35 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 0d9ad21..4c20f23 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.catalyst.parser import org.antlr.v4.runtime._ import org.antlr.v4.runtime.atn.PredictionMode -import org.antlr.v4.runtime.misc.ParseCancellationException +import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException} +import org.antlr.v4.runtime.tree.TerminalNodeImpl import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException @@ -80,7 +81,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { logDebug(s"Parsing command: $command") - val lexer = new SqlBaseLexer(new ANTLRNoCaseStringStream(command)) + val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command))) lexer.removeErrorListeners() lexer.addErrorListener(ParseErrorListener) @@ -99,7 +100,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { catch { case e: ParseCancellationException => // if we fail, parse with LL mode - tokenStream.reset() // rewind input stream + tokenStream.seek(0) // rewind input stream parser.reset() // Try Again. @@ -148,12 +149,33 @@ object CatalystSqlParser extends AbstractSqlParser { * the consume() function of the super class ANTLRStringStream. The LA() function is the lookahead * function and is purely used for matching lexical rules. This also means that the grammar will * only accept capitalized tokens in case it is run from other tools like antlrworks which do not - * have the ANTLRNoCaseStringStream implementation. + * have the UpperCaseCharStream implementation. */ -private[parser] class ANTLRNoCaseStringStream(input: String) extends ANTLRInputStream(input) { +private[parser] class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream { + override def consume(): Unit = wrapped.consume + override def getSourceName(): String = wrapped.getSourceName + override def index(): Int = wrapped.index + override def mark(): Int = wrapped.mark + override def release(marker: Int): Unit = wrapped.release(marker) + override def seek(where: Int): Unit = wrapped.seek(where) + override def size(): Int = wrapped.size + + override def getText(interval: Interval): String = { + // ANTLR 4.7's CodePointCharStream implementations have bugs when + // getText() is called with an empty stream, or intervals where + // the start > end. See + // https://github.com/antlr/antlr4/commit/ac9f7530 for one fix + // that is not yet in a released ANTLR artifact. + if (size() > 0 && (interval.b - interval.a >= 0)) { + wrapped.getText(interval) + } else { + "" + } + } + override def LA(i: Int): Int = { - val la = super.LA(i) + val la = wrapped.LA(i) if (la == 0 || la == IntStream.EOF) la else Character.toUpperCase(la) } @@ -244,11 +266,12 @@ case object PostProcessor extends SqlBaseBaseListener { val parent = ctx.getParent parent.removeLastChild() val token = ctx.getChild(0).getPayload.asInstanceOf[Token] - parent.addChild(f(new CommonToken( + val newToken = new CommonToken( new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream), SqlBaseParser.IDENTIFIER, token.getChannel, token.getStartIndex + stripMargins, - token.getStopIndex - stripMargins))) + token.getStopIndex - stripMargins) + parent.addChild(new TerminalNodeImpl(f(newToken))) } } http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 9c1031e..9b127f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -32,7 +32,7 @@ object ParserUtils { /** Get the command which created the token. */ def command(ctx: ParserRuleContext): String = { val stream = ctx.getStart.getInputStream - stream.getText(Interval.of(0, stream.size())) + stream.getText(Interval.of(0, stream.size() - 1)) } def operationNotAllowed(message: String, ctx: ParserRuleContext): Nothing = { @@ -58,7 +58,7 @@ object ParserUtils { /** Get all the text which comes after the given token. */ def remainder(token: Token): String = { val stream = token.getInputStream - val interval = Interval.of(token.getStopIndex + 1, stream.size()) + val interval = Interval.of(token.getStopIndex + 1, stream.size() - 1) stream.getText(interval) } http://git-wip-us.apache.org/repos/asf/spark/blob/9f5c77ae/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala index d5748a4..768030f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.catalyst.parser -import org.antlr.v4.runtime.{CommonTokenStream, ParserRuleContext} +import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ @@ -57,7 +57,7 @@ class ParserUtilsSuite extends SparkFunSuite { } private def buildContext[T](command: String)(toResult: SqlBaseParser => T): T = { - val lexer = new SqlBaseLexer(new ANTLRNoCaseStringStream(command)) + val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command))) val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) toResult(parser) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
