sammccall updated this revision to Diff 427581.
sammccall marked an inline comment as done.
sammccall added a comment.

rebase
add include to testcase
rename preprocess => strip-includes, and testcase
move print directivetree test to strip-includes.c
fix tests to not cook before parsing directivetree


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123243/new/

https://reviews.llvm.org/D123243

Files:
  clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
  clang-tools-extra/pseudo/lib/DirectiveTree.cpp
  clang-tools-extra/pseudo/test/lex.c
  clang-tools-extra/pseudo/test/strip-directives.c
  clang-tools-extra/pseudo/tool/ClangPseudo.cpp
  clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp

Index: clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
+++ clang-tools-extra/pseudo/unittests/DirectiveTreeTest.cpp
@@ -27,14 +27,23 @@
 using testing::StrEq;
 using Chunk = DirectiveTree::Chunk;
 
-MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) {
+// Matches text of a list of tokens against a string (joined with spaces).
+// e.g. EXPECT_THAT(Stream.tokens(), tokens("int main ( ) { }"));
+MATCHER_P(tokens, Tokens, "") {
   std::vector<llvm::StringRef> Texts;
-  for (const Token &Tok : TS.tokens(arg.Tokens))
+  for (const Token &Tok : arg)
     Texts.push_back(Tok.text());
   return Matcher<std::string>(StrEq(Tokens))
       .MatchAndExplain(llvm::join(Texts, " "), result_listener);
 }
 
+// Matches tokens covered a directive chunk (with a Tokens property) against a
+// string, similar to tokens() above.
+// e.g. EXPECT_THAT(SomeDirective, tokensAre(Stream, "# include < vector >"));
+MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) {
+  return testing::Matches(tokens(Tokens))(TS.tokens(arg.Tokens));
+}
+
 MATCHER_P(chunkKind, K, "") { return arg.kind() == K; }
 
 TEST(DirectiveTree, Parse) {
@@ -301,6 +310,45 @@
   }
 }
 
+TEST(DirectiveTree, StripDirectives) {
+  LangOptions Opts;
+  std::string Code = R"cpp(
+    #include <stddef.h>
+    a a a
+    #warning AAA
+    b b b
+    #if 1
+      c c c
+      #warning BBB
+      #if 0
+        d d d
+        #warning CC
+      #else
+        e e e
+      #endif
+      f f f
+      #if 0
+        g g g
+      #endif
+      h h h
+    #else
+      i i i
+    #endif
+    j j j
+  )cpp";
+  TokenStream S = lex(Code, Opts);
+
+  DirectiveTree Tree = DirectiveTree::parse(S);
+  chooseConditionalBranches(Tree, S);
+  EXPECT_THAT(Tree.stripDirectives(S).tokens(),
+              tokens("a a a b b b c c c e e e f f f h h h j j j"));
+
+  const DirectiveTree &Part =
+      ((const DirectiveTree::Conditional &)Tree.Chunks[4]).Branches[0].second;
+  EXPECT_THAT(Part.stripDirectives(S).tokens(),
+              tokens("c c c e e e f f f h h h"));
+}
+
 } // namespace
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/tool/ClangPseudo.cpp
===================================================================
--- clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/Signals.h"
 
 using clang::pseudo::Grammar;
+using clang::pseudo::TokenStream;
 using llvm::cl::desc;
 using llvm::cl::init;
 using llvm::cl::opt;
@@ -37,6 +38,9 @@
 static opt<bool>
     PrintDirectiveTree("print-directive-tree",
                       desc("Print directive structure of source code"));
+static opt<bool>
+    StripDirectives("strip-directives",
+                    desc("Strip directives and select conditional sections"));
 static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
 static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
 
@@ -58,22 +62,30 @@
   clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
   std::string SourceText;
   llvm::Optional<clang::pseudo::TokenStream> RawStream;
-  llvm::Optional<clang::pseudo::DirectiveTree> DirectiveStructure;
+  llvm::Optional<TokenStream> PreprocessedStream;
   llvm::Optional<clang::pseudo::TokenStream> ParseableStream;
   if (Source.getNumOccurrences()) {
     SourceText = readOrDie(Source);
     RawStream = clang::pseudo::lex(SourceText, LangOpts);
-    DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream);
-    clang::pseudo::chooseConditionalBranches(*DirectiveStructure, *RawStream);
+    TokenStream *Stream = RawStream.getPointer();
+
+    auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(*RawStream);
+    clang::pseudo::chooseConditionalBranches(DirectiveStructure, *RawStream);
+
+    llvm::Optional<TokenStream> Preprocessed;
+    if (StripDirectives) {
+      Preprocessed = DirectiveStructure.stripDirectives(*Stream);
+      Stream = Preprocessed.getPointer();
+    }
 
-    if (PrintDirectiveTree)
-      llvm::outs() << DirectiveStructure;
     if (PrintSource)
-      RawStream->print(llvm::outs());
+      Stream->print(llvm::outs());
     if (PrintTokens)
-      llvm::outs() << RawStream;
+      llvm::outs() << *Stream;
+    if (PrintDirectiveTree)
+      llvm::outs() << DirectiveStructure;
 
-    ParseableStream = clang::pseudo::stripComments(cook(*RawStream, LangOpts));
+    ParseableStream = clang::pseudo::stripComments(cook(*Stream, LangOpts));
   }
 
   if (Grammar.getNumOccurrences()) {
Index: clang-tools-extra/pseudo/test/strip-directives.c
===================================================================
--- /dev/null
+++ clang-tools-extra/pseudo/test/strip-directives.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+int main() {
+#error This was inevitable...
+#if HELLO
+  printf("hello, world\n");
+  return 0;
+#else
+  abort();
+#endif
+}
+
+/* This comment gets lexed along with the input above! We just don't CHECK it.
+
+RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace
+     PPT: #include (7 tokens)
+PPT-NEXT: code (5 tokens)
+PPT-NEXT: #error (6 tokens)
+PPT-NEXT: #if (3 tokens) TAKEN
+PPT-NEXT:   code (8 tokens)
+PPT-NEXT: #else (2 tokens)
+PPT-NEXT:   code (4 tokens)
+PPT-NEXT: #endif (2 tokens)
+PPT-NEXT: code (2 tokens)
+                ^ including this block comment
+
+RUN: clang-pseudo -source %s -strip-directives -print-source | FileCheck %s --strict-whitespace
+     CHECK: int main() {
+CHECK-NEXT:   printf("hello, world\n");
+CHECK-NEXT:   return 0;
+CHECK-NEXT: }
+
+RUN: clang-pseudo -source %s -strip-directives -print-tokens | FileCheck %s --check-prefix=TOKEN
+     TOKEN: 0: raw_identifier 1:0 "int" flags=1
+TOKEN-NEXT: raw_identifier    1:0 "main"
+TOKEN-NEXT: l_paren           1:0 "("
+TOKEN-NEXT: r_paren           1:0 ")"
+TOKEN-NEXT: l_brace           1:0 "{"
+TOKEN-NEXT: raw_identifier    4:2 "printf" flags=1
+TOKEN-NEXT: l_paren           4:2 "("
+TOKEN-NEXT: string_literal    4:2 "\22hello, world\\n\22"
+TOKEN-NEXT: r_paren            4:2 ")"
+TOKEN-NEXT: semi              4:2 ";"
+TOKEN-NEXT: raw_identifier    5:2 "return" flags=1
+TOKEN-NEXT: numeric_constant  5:2 "0"
+TOKEN-NEXT: semi              5:2 ";"
+TOKEN-NEXT: r_brace           9:0 "}" flags=1
+
+*******************************************************************************/
+
Index: clang-tools-extra/pseudo/test/lex.c
===================================================================
--- clang-tools-extra/pseudo/test/lex.c
+++ clang-tools-extra/pseudo/test/lex.c
@@ -18,7 +18,7 @@
 SOURCE-NEXT: }
 
 RUN: clang-pseudo -source %s -print-tokens | FileCheck %s -check-prefix=TOKEN
-     TOKEN:   0: raw_identifier   0:0 "int" flags=1
+     TOKEN: 0: raw_identifier 0:0 "int" flags=1
 TOKEN-NEXT: raw_identifier   0:0 "is_debug"
 TOKEN-NEXT: l_paren          0:0 "("
 TOKEN-NEXT: r_paren          0:0 ")"
@@ -39,14 +39,4 @@
 TOKEN-NEXT: raw_identifier   5:0 "endif"
 TOKEN-NEXT: r_brace          6:0 "}" flags=1
 
-RUN: clang-pseudo -source %s -print-directive-tree | FileCheck %s -check-prefix=PPT --strict-whitespace
-     PPT: code (5 tokens)
-PPT-NEXT: #ifndef (3 tokens) TAKEN
-PPT-NEXT:   code (4 tokens)
-PPT-NEXT: #else (2 tokens)
-PPT-NEXT:   code (3 tokens)
-PPT-NEXT: #endif (2 tokens)
-PPT-NEXT: code (2 tokens)
-                ^ including this block comment
-
 *******************************************************************************/
Index: clang-tools-extra/pseudo/lib/DirectiveTree.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/DirectiveTree.cpp
+++ clang-tools-extra/pseudo/lib/DirectiveTree.cpp
@@ -347,5 +347,53 @@
   BranchChooser{Code}.choose(Tree);
 }
 
+namespace {
+class Preprocessor {
+  const TokenStream &In;
+  TokenStream &Out;
+
+public:
+  Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
+  ~Preprocessor() { Out.finalize(); }
+
+  void walk(const DirectiveTree &T) {
+    for (const auto &C : T.Chunks)
+      walk(C);
+  }
+
+  void walk(const DirectiveTree::Chunk &C) {
+    switch (C.kind()) {
+    case DirectiveTree::Chunk::K_Code:
+      return walk((const DirectiveTree::Code &)C);
+    case DirectiveTree::Chunk::K_Directive:
+      return walk((const DirectiveTree::Directive &)C);
+    case DirectiveTree::Chunk::K_Conditional:
+      return walk((const DirectiveTree::Conditional &)C);
+    case DirectiveTree::Chunk::K_Empty:
+      break;
+    }
+    llvm_unreachable("bad chunk kind");
+  }
+
+  void walk(const DirectiveTree::Code &C) {
+    for (const auto &Tok : In.tokens(C.Tokens))
+      Out.push(Tok);
+  }
+
+  void walk(const DirectiveTree::Directive &) {}
+
+  void walk(const DirectiveTree::Conditional &C) {
+    if (C.Taken)
+      walk(C.Branches[*C.Taken].second);
+  }
+};
+} // namespace
+
+TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
+  TokenStream Out;
+  Preprocessor(In, Out).walk(*this);
+  return Out;
+}
+
 } // namespace pseudo
 } // namespace clang
Index: clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/DirectiveTree.h
@@ -92,7 +92,11 @@
   /// Extract preprocessor structure by examining the raw tokens.
   static DirectiveTree parse(const TokenStream &);
 
-  // FIXME: allow deriving a preprocessed stream
+  /// Produce a parseable token stream by stripping all directive tokens.
+  ///
+  /// Conditional sections are replaced by the taken branch, if any.
+  /// This tree must describe the provided token stream.
+  TokenStream stripDirectives(const TokenStream &) const;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Chunk &);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to