This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new fdf540f40fe [fix](Nereids) fix lexer Backtracking or Ambiguity cause of key word duplicate (#39590) (#39750) fdf540f40fe is described below commit fdf540f40fe80cc880d33e7826dee61c7e4f580d Author: LiBinfeng <46676950+libinfeng...@users.noreply.github.com> AuthorDate: Thu Aug 22 14:59:55 2024 +0800 [fix](Nereids) fix lexer Backtracking or Ambiguity cause of key word duplicate (#39590) (#39750) cherry-pick from master #39590 issue intro by #39416 --- .../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 36 ++-------------------- .../antlr4/org/apache/doris/nereids/DorisParser.g4 | 5 ++- .../apache/doris/nereids/parser/NereidsParser.java | 8 ++--- 3 files changed, 8 insertions(+), 41 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 64d2e9ce8ca..8916c263898 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -47,22 +47,6 @@ lexer grammar DorisLexer; } } - /** - * This method will be called when we see '/*' and try to match it as a bracketed comment. - * If the next character is '+', it should be parsed as hint later, and we cannot match - * it as a bracketed comment. - * - * Returns true if the next character is '+'. - */ - public boolean isHint() { - int nextChar = _input.LA(1); - if (nextChar == '+') { - return true; - } else { - return false; - } - } - /** * This method will be called when the character stream ends and try to find out the * unclosed bracketed comment. @@ -72,19 +56,6 @@ lexer grammar DorisLexer; public void markUnclosedComment() { has_unclosed_bracketed_comment = true; } - - // This variable will hold the external state - private boolean channel2; - - // Method to set the external state - public void setChannel2(boolean value) { - this.channel2 = value; - } - - // Method to decide the channel based on external state - private boolean isChannel2() { - return this.channel2; - } } SEMICOLON: ';'; @@ -587,6 +558,7 @@ COLON: ':'; ARROW: '->'; HINT_START: '/*+'; HINT_END: '*/'; +COMMENT_START: '/*'; ATSIGN: '@'; DOUBLEATSIGN: '@@'; @@ -664,11 +636,7 @@ SIMPLE_COMMENT ; BRACKETED_COMMENT - : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN) - ; - -HINT_WITH_CHANNEL - : {isChannel2()}? HINT_START .*? HINT_END -> channel(2) + : COMMENT_START ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(2) ; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 9f65c14036b..62c5661589c 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -189,7 +189,7 @@ havingClause : HAVING booleanExpression ; -selectHint: HINT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HINT_END; +selectHint: hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HINT_END; hintStatement : hintName=identifier (LEFT_PAREN parameters+=hintAssignment (COMMA? parameters+=hintAssignment)* RIGHT_PAREN)? @@ -602,6 +602,7 @@ nonReserved | COLLATION | COLUMNS | COMMENT + | COMMENT_START | COMMIT | COMMITTED | COMPACT @@ -685,6 +686,8 @@ nonReserved | HASH | HDFS | HELP + | HINT_END + | HINT_START | HISTOGRAM | HLL_UNION | HOSTNAME diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java index 7bf7c7737bf..4fa75083378 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java @@ -92,18 +92,15 @@ public class NereidsParser { Function<DorisParser, ParserRuleContext> parseFunction) { // parse hint first round DorisLexer hintLexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); - hintLexer.setChannel2(true); CommonTokenStream hintTokenStream = new CommonTokenStream(hintLexer); Map<Integer, ParserRuleContext> selectHintMap = Maps.newHashMap(); Token hintToken = hintTokenStream.getTokenSource().nextToken(); while (hintToken != null && hintToken.getType() != DorisLexer.EOF) { - int tokenType = hintToken.getType(); - if (tokenType == DorisLexer.HINT_WITH_CHANNEL) { - String hintSql = sql.substring(hintToken.getStartIndex(), hintToken.getStopIndex() + 1); + if (hintToken.getChannel() == 2 && sql.charAt(hintToken.getStartIndex() + 2) == '+') { + String hintSql = sql.substring(hintToken.getStartIndex() + 3, hintToken.getStopIndex() + 1); DorisLexer newHintLexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(hintSql))); - newHintLexer.setChannel2(false); CommonTokenStream newHintTokenStream = new CommonTokenStream(newHintLexer); DorisParser hintParser = new DorisParser(newHintTokenStream); ParserRuleContext hintContext = parseFunction.apply(hintParser); @@ -117,7 +114,6 @@ public class NereidsParser { /** toAst */ private ParserRuleContext toAst(String sql, Function<DorisParser, ParserRuleContext> parseFunction) { DorisLexer lexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); - lexer.setChannel2(true); CommonTokenStream tokenStream = new CommonTokenStream(lexer); DorisParser parser = new DorisParser(tokenStream); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org