hokein updated this revision to Diff 462448.
hokein added a comment.
Fix a bug caused by my change, and add test.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D130460/new/
https://reviews.llvm.org/D130460
Files:
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/include/clang-pseudo/cxx/Recovery.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/cxx/Recovery.cpp
clang-tools-extra/pseudo/lib/cxx/cxx.bnf
clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
clang-tools-extra/pseudo/unittests/CMakeLists.txt
clang-tools-extra/pseudo/unittests/CXXRecoveryTest.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -48,13 +48,13 @@
testing::UnorderedElementsAreArray(Parents));
}
-Token::Index recoverBraces(Token::Index Begin, const TokenStream &Code) {
- EXPECT_GT(Begin, 0u);
- const Token &Left = Code.tokens()[Begin - 1];
+Token::Index recoverBraces(const RecoveryParams &P) {
+ EXPECT_GT(P.Begin, 0u);
+ const Token &Left = P.Tokens.tokens()[P.Begin - 1];
EXPECT_EQ(Left.Kind, tok::l_brace);
if (const auto* Right = Left.pair()) {
EXPECT_EQ(Right->Kind, tok::r_brace);
- return Code.index(*Right);
+ return P.Tokens.index(*Right);
}
return Token::Invalid;
}
@@ -613,7 +613,7 @@
TestLang.Table = LRTable::buildSLR(TestLang.G);
TestLang.RecoveryStrategies.try_emplace(
extensionID("Skip"),
- [](Token::Index Start, const TokenStream &) { return Start; });
+ [](const RecoveryParams& P) { return P.Begin; });
clang::LangOptions LOptions;
TokenStream Tokens = cook(lex("foo", LOptions), LOptions);
@@ -641,7 +641,7 @@
TestLang.Table = LRTable::buildSLR(TestLang.G);
TestLang.RecoveryStrategies.try_emplace(
extensionID("AcceptAnyTokenInstead"),
- [](Token::Index Start, const TokenStream &Stream) { return Start + 1; });
+ [](const RecoveryParams& P) { return P.Begin + 1; });
const ForestNode &Parsed =
glrParse({Tokens, Arena, GSStack}, id("sentence"), TestLang);
@@ -660,9 +660,9 @@
)bnf");
TestLang.Table = LRTable::buildSLR(TestLang.G);
bool fallback_recovered = false;
- auto fallback = [&](Token::Index Start, const TokenStream & Code) {
+ auto fallback = [&](const RecoveryParams& P) {
fallback_recovered = true;
- return Code.tokens().size();
+ return P.Tokens.tokens().size();
};
TestLang.RecoveryStrategies.try_emplace(
extensionID("Fallback"),
@@ -694,7 +694,7 @@
TestLang.Table = LRTable::buildSLR(TestLang.G);
TestLang.RecoveryStrategies.try_emplace(
extensionID("Skip"),
- [](Token::Index Start, const TokenStream &) { return Start; });
+ [](const RecoveryParams& P) { return P.Begin; });
clang::LangOptions LOptions;
TokenStream Tokens = cook(lex("main", LOptions), LOptions);
Index: clang-tools-extra/pseudo/unittests/CXXRecoveryTest.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/unittests/CXXRecoveryTest.cpp
@@ -0,0 +1,110 @@
+//===--- CXXRecoveryTest.cpp - ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/Bracket.h"
+#include "clang-pseudo/Language.h"
+#include "clang-pseudo/Token.h"
+#include "clang-pseudo/cxx/Recovery.h"
+
+#include "clang/Basic/LangOptions.h"
+#include "llvm/Testing/Support/Annotations.h"
+#include "gtest/gtest.h"
+
+namespace clang::pseudo {
+
+namespace {
+
+std::pair<RecoveryParams, /*Expected End*/ Token::Index>
+extract(const llvm::Annotations &Test) {
+ assert(Test.points().size() == 2 && "Expect two ^s in the test annotations");
+ LangOptions Opts = clang::pseudo::genericLangOpts();
+ std::string Code = Test.code().str();
+ TokenStream Cook = cook(lex(Code, Opts), Opts);
+ pairBrackets(Cook);
+
+ RecoveryParams P{0, Token::Invalid, Cook};
+ auto StartOffset = [&](const Token &T) -> size_t {
+ return T.Data - Code.data();
+ };
+ for (const auto &T : Cook.tokens()) {
+ if (StartOffset(T) == Test.points().front()) {
+ P.Cursor = Cook.index(T);
+ break;
+ }
+ }
+ assert(P.Cursor != Token::Invalid && "no cursor position provided");
+ Token::Index Expect = Token::Invalid;
+ if (Test.points().back() > StartOffset(Cook.tokens().back()))
+ Expect = Cook.tokens().size(); // EOF
+ else {
+ for (const auto &T : Cook.tokens()) {
+ if (StartOffset(T) == Test.points().back()) {
+ Expect = Cook.index(T);
+ }
+ }
+ assert(Expect != Token::Invalid);
+ }
+ return {P, Expect};
+}
+
+TEST(CXXRecoveryTest, Declaration) {
+ // We have 3 different positions in the TokenStream:
+ // - Begin: where we start to parse the code (always 0 in the test)
+ // - Cursor: where the parse failed (specified by the first ^)
+ // - End: expected ends of the range (specified by the second ^)
+ const char *Tests[] = {
+ R"cpp(
+ namespace ^??? { }
+ ^Foo foo;
+ )cpp",
+ R"cpp(
+ auto lambda = [] ^xxx () xxx {} ;
+ ^Foo foo;
+ )cpp",
+ R"cpp(
+ LLVM_DISCARD int ^bar() {}
+ ^int;
+ )cpp",
+ // Recovery should at least consume 1 token.
+ R"cpp(
+ int
+ ^int
+ ^
+ )cpp",
+ // FIXME: we skip too much, Foo is a better end point.
+ R"cpp(
+ LLVM_DISCARD int ^bar() {}
+ Foo foo();
+ ^int;
+ )cpp",
+ R"cpp(
+ void Foo::AcquireLease() ^LOCKS_EXCLUDED(mutex_) {}
+
+ Foo abc();
+ ^
+ )cpp",
+ // FIXME: skipp too much, we should end at private.
+ R"cpp(
+ class Foo {
+ public:
+ void abc() ^ERRROR
+ private:
+ int a;
+ };
+ ^
+ )cpp",
+ };
+ for (const auto *T : Tests) {
+ llvm::Annotations Code(T);
+ const auto &[RP, ExpectedEnd] = extract(Code);
+ EXPECT_EQ(pseudo::cxx::recoverNextDeclaration(RP), ExpectedEnd)
+ << Code.code();
+ }
+}
+} // namespace
+} // namespace clang::pseudo
Index: clang-tools-extra/pseudo/unittests/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/unittests/CMakeLists.txt
+++ clang-tools-extra/pseudo/unittests/CMakeLists.txt
@@ -5,6 +5,7 @@
add_custom_target(ClangPseudoUnitTests)
add_unittest(ClangPseudoUnitTests ClangPseudoTests
BracketTest.cpp
+ CXXRecoveryTest.cpp
CXXTest.cpp
DirectiveTreeTest.cpp
DisambiguateTest.cpp
Index: clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/cxx/recovery-declarations.cpp
@@ -0,0 +1,30 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+
+this is not a valid declaration;
+// CHECK: declaration := <opaque>
+
+int x[0];
+// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK-NEXT: ââdecl-specifier-seq~INT := tok[7]
+// CHECK-NEXT: ââinit-declarator-list~noptr-declarator := noptr-declarator [ constant-expression ]
+
+also_not_ok ??? @@@ (;
+// CHECK: declaration := <opaque>
+
+class Y{};
+// CHECK: simple-declaration := decl-specifier-seq ;
+// CHECK-NEXT: ââdecl-specifier-seq~class-specifier := class-head { }
+
+// FIXME: support recovery of incomplete scopes
+// namespace foo {
+
+class class class class function
+// CHECK: declaration := <opaque>
+
+export {
+;
+?
+// CHECK: declaration-seq := declaration-seq declaration
+// CHECK-NEXT: ââdeclaration-seq~; := tok[34]
+// CHECK-NEXT: ââdeclaration := <opaque>
+}
Index: clang-tools-extra/pseudo/lib/cxx/cxx.bnf
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -304,8 +304,8 @@
declaration-statement := block-declaration
# gram.dcl
-declaration-seq := declaration
-declaration-seq := declaration-seq declaration
+declaration-seq := declaration [recover=NextDeclaration]
+declaration-seq := declaration-seq declaration [recover=NextDeclaration]
declaration := block-declaration
declaration := nodeclspec-function-declaration
declaration := function-definition
@@ -554,7 +554,7 @@
class-key := CLASS
class-key := STRUCT
class-key := UNION
-member-specification := member-declaration member-specification_opt
+member-specification := member-declaration [recover=NextDeclaration] member-specification_opt
member-specification := access-specifier : member-specification_opt
member-declaration := decl-specifier-seq member-declarator-list_opt ;
member-declaration := member-declarator-list ;
Index: clang-tools-extra/pseudo/lib/cxx/Recovery.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/lib/cxx/Recovery.cpp
@@ -0,0 +1,121 @@
+//===--- Recovery.cpp - ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/cxx/Recovery.h"
+#include "clang-pseudo/Language.h"
+#include "clang/Basic/TokenKinds.h"
+
+namespace clang::pseudo::cxx {
+
+Token::Index recoverNextDeclaration(const RecoveryParams &P) {
+ if (P.Begin == P.Tokens.tokens().size())
+ return Token::Invalid;
+ // Declarations that don't end in semicolons:
+ // function-definition
+ // EXPORT {...}
+ // EXTERN string-literal {...}
+ // INLINE_opt NAMESPACE IDENTIFIER {...}
+ // INLINE_opt NAMESPACE {...}
+ // NAMESPACE enclosing-namespace-specifier :: INLINE_opt IDENTIFIER {...}
+ // Functions are hard to do anything about:
+ // - if the declarator is good and the braces are present, parsing succeeds
+ // - if the declarator is bad, it's hard to know it's a function
+ // - if the braces are missing, we don't know where to recover
+ // So we only try to detect the others.
+ bool AcceptBraces = false;
+ auto ConsiderForBraces = [&](tok::TokenKind K) {
+ if (K == tok::kw_export || K == tok::kw_extern || K == tok::kw_namespace)
+ AcceptBraces = true;
+ };
+
+ // Some tokens are pretty good indicators of a declaration starting, when
+ // they appear at the beginning of a line.
+ auto LikelyStartsDeclaration = [&](tok::TokenKind K) -> bool {
+ switch (K) {
+ // Things you might be declaring.
+ case tok::kw_template:
+ case tok::kw_using:
+ case tok::kw_namespace:
+ case tok::kw_typedef:
+ case tok::kw_class:
+ case tok::kw_struct:
+ case tok::kw_union:
+ case tok::kw_friend:
+ // Types & other decl-specifiers
+ // FIXME: consider IDENTIFIER
+ case tok::kw_void:
+ case tok::kw_auto:
+ case tok::kw_int:
+ case tok::kw_char:
+ case tok::kw_float:
+ case tok::kw_double:
+ case tok::kw_long:
+ case tok::kw_signed:
+ case tok::kw_unsigned:
+ case tok::kw_const:
+ case tok::kw_volatile:
+ case tok::kw_static:
+ case tok::kw_extern:
+ case tok::kw_inline:
+ case tok::kw_explicit:
+ case tok::kw_virtual:
+ // Modules
+ case tok::kw_import:
+ case tok::kw_export:
+ // Class protection
+ case tok::kw_public:
+ case tok::kw_private:
+ case tok::kw_protected:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ // The indentation level where the declaration started hints at where a new
+ // sibling declaration might start.
+ unsigned OrigIndent = P.Tokens.tokens()[P.Begin].Indent;
+
+ // Walk over tokens at the appropriate bracket nesting level.
+ for (const Token *T = &P.Tokens.tokens()[P.Begin]; T->Kind != tok::eof;
+ T = &T->next()) {
+ auto TIndex = P.Tokens.index(*T);
+ // Handle (paired) brackets.
+ if (const Token *PairedT = T->pair()) {
+ // Closing bracket not paired within the recovery range.
+ auto PairedTIndex = P.Tokens.index(*PairedT);
+ if (PairedT < T) {
+ assert(PairedTIndex < P.Begin && "Why didn't we skip this one?");
+ assert(TIndex >= P.Cursor &&
+ "How did we parse an unmatched closing bracket?!");
+ return TIndex;
+ }
+ // Opening bracket: is this a semicolonless statement?
+ if (T->Kind == tok::l_brace && AcceptBraces && PairedTIndex >= P.Cursor)
+ return PairedTIndex + 1;
+ // Skip over opening/closing bracket pair.
+ T = PairedT;
+ continue;
+ }
+ ConsiderForBraces(T->Kind);
+
+ // Semicolons terminate statements.
+ if (T->Kind == tok::semi && TIndex >= P.Cursor)
+ return TIndex + 1;
+
+ // Stop if it looks like a new declaration is starting on the next line.
+ if (TIndex > P.Cursor // ensure cursor is moving forward after recovery
+ && (TIndex > 0 && T->Line != T->prev().Line) // start on the next line.
+ && T->Indent == OrigIndent && LikelyStartsDeclaration(T->Kind))
+ return TIndex;
+ }
+ // If we reached EOF, then assume the declaration runs right to EOF.
+ return P.Tokens.tokens().size();
+}
+
+} // namespace clang::pseudo::cxx
Index: clang-tools-extra/pseudo/lib/cxx/CXX.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -9,6 +9,7 @@
#include "clang-pseudo/cxx/CXX.h"
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/Language.h"
+#include "clang-pseudo/cxx/Recovery.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/CharInfo.h"
@@ -410,14 +411,14 @@
#undef SYMBOL_GUARD
}
-Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
- assert(Begin > 0);
- const Token &Left = Tokens.tokens()[Begin - 1];
+Token::Index recoverBrackets(const RecoveryParams &P) {
+ assert(P.Begin > 0);
+ const Token &Left = P.Tokens.tokens()[P.Begin - 1];
assert(Left.Kind == tok::l_brace || Left.Kind == tok::l_paren ||
Left.Kind == tok::l_square);
if (const Token *Right = Left.pair()) {
- assert(Tokens.index(*Right) > Begin - 1);
- return Tokens.index(*Right);
+ assert(P.Tokens.index(*Right) > P.Begin - 1);
+ return P.Tokens.index(*Right);
}
return Token::Invalid;
}
@@ -425,6 +426,7 @@
llvm::DenseMap<ExtensionID, RecoveryStrategy> buildRecoveryStrategies() {
return {
{Extension::Brackets, recoverBrackets},
+ {Extension::NextDeclaration, recoverNextDeclaration},
};
}
Index: clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
===================================================================
--- clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
+++ clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -4,6 +4,7 @@
add_clang_library(clangPseudoCXX
CXX.cpp
+ Recovery.cpp
DEPENDS
cxx_gen
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -28,11 +28,12 @@
namespace {
Token::Index findRecoveryEndpoint(ExtensionID Strategy, Token::Index Begin,
+ Token::Index Cursor,
const TokenStream &Tokens,
const Language &Lang) {
assert(Strategy != 0);
if (auto S = Lang.RecoveryStrategies.lookup(Strategy))
- return S(Begin, Tokens);
+ return S({Begin, Cursor, Tokens});
return Token::Invalid;
}
@@ -142,7 +143,7 @@
break;
auto End = findRecoveryEndpoint(Option.Strategy, Option.Position,
- Params.Code, Lang);
+ TokenIndex, Params.Code, Lang);
// Recovery may not take the parse backwards.
if (End == Token::Invalid || End < TokenIndex)
continue;
Index: clang-tools-extra/pseudo/include/clang-pseudo/cxx/Recovery.h
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/include/clang-pseudo/cxx/Recovery.h
@@ -0,0 +1,33 @@
+//===--- Recovery.h - C++ specific recovery heuristics ------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CXX_RECOVERY_H
+#define CLANG_PSEUDO_CXX_RECOVERY_H
+
+#include "clang-pseudo/Language.h"
+
+namespace clang::pseudo::cxx {
+
+// Heuristically finds the end of range of a declaration if parsing fails while
+// attempting to parse a declaration.
+//
+// Returns an *excluded* end of token range. Given the broken code below,
+//
+// int a = ? garbage ?;
+// double;
+//
+// We failed to parse the first declaration, the function returns the index of
+// `double` token. We skip up to the first `;` token and assume tokens in
+// [P.Begin, recoverNextDeclaration(P)) range was a declaration.
+//
+// Returns Token::Invalid if we can't find an end point.
+Token::Index recoverNextDeclaration(const RecoveryParams &P);
+
+} // namespace clang::pseudo::cxx
+
+#endif // CLANG_PSEUDO_CXX_RECOVERY_H
Index: clang-tools-extra/pseudo/include/clang-pseudo/Language.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -34,6 +34,14 @@
// Return true if the guard is satisfied.
using RuleGuard = llvm::function_ref<bool(const GuardParams &)>;
+struct RecoveryParams {
+ // The position in the stream where the node being recovered will start.
+ Token::Index Begin;
+ // The position in the stream where the parse failed.
+ // Begin <= Cursor <= End (for a valid recovery).
+ Token::Index Cursor;
+ const TokenStream &Tokens;
+};
// A recovery strategy determines a region of code to skip when parsing fails.
//
// For example, given `class-def := CLASS IDENT { body [recover=Brackets] }`,
@@ -43,7 +51,7 @@
// The provided index is the token where the skipped region begins.
// Returns the (excluded) end of the range, or Token::Invalid for no recovery.
using RecoveryStrategy =
- llvm::function_ref<Token::Index(Token::Index Start, const TokenStream &)>;
+ llvm::function_ref<Token::Index(const RecoveryParams &)>;
// Specify a language that can be parsed by the pseduoparser.
struct Language {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits