akyrtzi updated this revision to Diff 431429.
akyrtzi marked an inline comment as done.
akyrtzi added a comment.
Add documentation comments for a couple of fields of `Scanner` in
`DependencyDirectivesScanner.cpp`
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D125487/new/
https://reviews.llvm.org/D125487
Files:
clang/include/clang/Lex/DependencyDirectivesScanner.h
clang/include/clang/Lex/Lexer.h
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
clang/lib/Frontend/FrontendActions.cpp
clang/lib/Lex/DependencyDirectivesScanner.cpp
clang/lib/Lex/Lexer.cpp
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
clang/unittests/Tooling/DependencyScannerTest.cpp
Index: clang/unittests/Tooling/DependencyScannerTest.cpp
===================================================================
--- clang/unittests/Tooling/DependencyScannerTest.cpp
+++ clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -204,51 +204,5 @@
EXPECT_EQ(convert_to_slash(Deps[5]), "/root/symlink.h");
}
-namespace dependencies {
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) {
- auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
- VFS->addFile("/mod.h", 0,
- llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
- "// hi there!\n"));
-
- DependencyScanningFilesystemSharedCache SharedCache;
- DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS);
-
- DepFS.enableDirectivesScanningOfAllFiles(); // Let's be explicit for clarity.
- auto StatusMinimized0 = DepFS.status("/mod.h");
- DepFS.disableDirectivesScanning("/mod.h");
- auto StatusFull1 = DepFS.status("/mod.h");
-
- EXPECT_TRUE(StatusMinimized0);
- EXPECT_TRUE(StatusFull1);
- EXPECT_EQ(StatusMinimized0->getSize(), 17u);
- EXPECT_EQ(StatusFull1->getSize(), 30u);
- EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
- EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
-}
-
-TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
- auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
- VFS->addFile("/mod.h", 0,
- llvm::MemoryBuffer::getMemBuffer("#include <foo.h>\n"
- "// hi there!\n"));
-
- DependencyScanningFilesystemSharedCache SharedCache;
- DependencyScanningWorkerFilesystem DepFS(SharedCache, VFS);
-
- DepFS.disableDirectivesScanning("/mod.h");
- auto StatusFull0 = DepFS.status("/mod.h");
- DepFS.enableDirectivesScanningOfAllFiles();
- auto StatusMinimized1 = DepFS.status("/mod.h");
-
- EXPECT_TRUE(StatusFull0);
- EXPECT_TRUE(StatusMinimized1);
- EXPECT_EQ(StatusFull0->getSize(), 30u);
- EXPECT_EQ(StatusMinimized1->getSize(), 17u);
- EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
- EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
-}
-
-} // end namespace dependencies
} // end namespace tooling
} // end namespace clang
Index: clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
===================================================================
--- clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -14,39 +14,58 @@
using namespace clang;
using namespace clang::dependency_directives_scan;
-static bool minimizeSourceToDependencyDirectives(StringRef Input,
- SmallVectorImpl<char> &Out) {
- SmallVector<dependency_directives_scan::Directive, 32> Directives;
- return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(
+ StringRef Input, SmallVectorImpl<char> &Out,
+ SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+ SmallVectorImpl<Directive> &Directives) {
+ Out.clear();
+ Tokens.clear();
+ Directives.clear();
+ if (scanSourceForDependencyDirectives(Input, Tokens, Directives))
+ return true;
+
+ raw_svector_ostream OS(Out);
+ printDependencyDirectivesAsSource(Input, Directives, OS);
+ if (!Out.empty() && Out.back() != '\n')
+ Out.push_back('\n');
+ Out.push_back('\0');
+ Out.pop_back();
+
+ return false;
}
-static bool
-minimizeSourceToDependencyDirectives(StringRef Input,
- SmallVectorImpl<char> &Out,
- SmallVectorImpl<Directive> &Directives) {
- return scanSourceForDependencyDirectives(Input, Out, Directives);
+static bool minimizeSourceToDependencyDirectives(StringRef Input,
+ SmallVectorImpl<char> &Out) {
+ SmallVector<dependency_directives_scan::Token, 16> Tokens;
+ SmallVector<Directive, 32> Directives;
+ return minimizeSourceToDependencyDirectives(Input, Out, Tokens, Directives);
}
namespace {
TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Directives));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("", Out, Tokens, Directives));
EXPECT_TRUE(Out.empty());
+ EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(1u, Directives.size());
ASSERT_EQ(pp_eof, Directives.back().Kind);
- ASSERT_FALSE(
- minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
+ Directives));
EXPECT_TRUE(Out.empty());
+ EXPECT_TRUE(Tokens.empty());
ASSERT_EQ(1u, Directives.size());
ASSERT_EQ(pp_eof, Directives.back().Kind);
}
-TEST(MinimizeSourceToDependencyDirectivesTest, AllDirectives) {
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
ASSERT_FALSE(
@@ -71,7 +90,7 @@
"#pragma include_alias(<A>, <B>)\n"
"export module m;\n"
"import m;\n",
- Out, Directives));
+ Out, Tokens, Directives));
EXPECT_EQ(pp_define, Directives[0].Kind);
EXPECT_EQ(pp_undef, Directives[1].Kind);
EXPECT_EQ(pp_endif, Directives[2].Kind);
@@ -91,19 +110,28 @@
EXPECT_EQ(pp_pragma_push_macro, Directives[16].Kind);
EXPECT_EQ(pp_pragma_pop_macro, Directives[17].Kind);
EXPECT_EQ(pp_pragma_include_alias, Directives[18].Kind);
- EXPECT_EQ(cxx_export_decl, Directives[19].Kind);
- EXPECT_EQ(cxx_module_decl, Directives[20].Kind);
- EXPECT_EQ(cxx_import_decl, Directives[21].Kind);
- EXPECT_EQ(pp_eof, Directives[22].Kind);
+ EXPECT_EQ(cxx_export_module_decl, Directives[19].Kind);
+ EXPECT_EQ(cxx_import_decl, Directives[20].Kind);
+ EXPECT_EQ(pp_eof, Directives[21].Kind);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyHash) {
+ SmallVector<char, 128> Out;
+
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("#\n#define MACRO a\n", Out));
+ EXPECT_STREQ("#define MACRO a\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
- ASSERT_FALSE(
- minimizeSourceToDependencyDirectives("#define MACRO", Out, Directives));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO", Out,
+ Tokens, Directives));
EXPECT_STREQ("#define MACRO\n", Out.data());
+ ASSERT_EQ(4u, Tokens.size());
ASSERT_EQ(2u, Directives.size());
ASSERT_EQ(pp_define, Directives.front().Kind);
}
@@ -144,25 +172,25 @@
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO con tent ", Out));
- EXPECT_STREQ("#define MACRO con tent\n", Out.data());
+ EXPECT_STREQ("#define MACRO con tent\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO() con tent ", Out));
- EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
+ EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
SmallVector<char, 128> Out;
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
- EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+ EXPECT_STREQ("#define MACRO((a))\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
- EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+ EXPECT_STREQ("#define MACRO(\n", Out.data());
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
- EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+ EXPECT_STREQ("#define MACRO(a*b)\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
@@ -170,19 +198,19 @@
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO(\t)\tcon \t tent\t", Out));
- EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+ EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO(\f)\fcon \f tent\f", Out));
- EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+ EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO(\v)\vcon \v tent\v", Out));
- EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+ EXPECT_STREQ("#define MACRO() con tent\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives(
"#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
- EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+ EXPECT_STREQ("#define MACRO con tent\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
@@ -255,25 +283,27 @@
TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
SmallVector<char, 128> Out;
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
}
TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
SmallVector<char, 128> Out;
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define &\n", Out));
}
TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
SmallVector<char, 128> Out;
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
- EXPECT_STREQ("#define AND &\n", Out.data());
+ EXPECT_STREQ("#define AND&\n", Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
"&\n",
Out));
- EXPECT_STREQ("#define AND &\n", Out.data());
+ EXPECT_STREQ("#define AND\\\n"
+ "&\n",
+ Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
@@ -303,6 +333,14 @@
Out.data());
}
+TEST(MinimizeSourceToDependencyDirectivesTest, CommentSlashSlashStar) {
+ SmallVector<char, 128> Out;
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+ "#define MACRO 1 //* blah */\n", Out));
+ EXPECT_STREQ("#define MACRO 1\n", Out.data());
+}
+
TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
SmallVector<char, 128> Out;
@@ -481,6 +519,9 @@
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include MACRO\n", Out));
+ EXPECT_STREQ("#include MACRO\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
@@ -507,8 +548,9 @@
SmallVector<char, 128> Out;
ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
}
TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
@@ -559,7 +601,8 @@
"#define GUARD\n"
"#endif\n",
Out));
- EXPECT_STREQ("#ifndef GUARD\n"
+ EXPECT_STREQ("#if\\\n"
+ "ndef GUARD\n"
"#define GUARD\n"
"#endif\n",
Out.data());
@@ -567,12 +610,16 @@
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
"RD\n",
Out));
- EXPECT_STREQ("#define GUARD\n", Out.data());
+ EXPECT_STREQ("#define GUA\\\n"
+ "RD\n",
+ Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
"RD\n",
Out));
- EXPECT_STREQ("#define GUARD\n", Out.data());
+ EXPECT_STREQ("#define GUA\\\r"
+ "RD\n",
+ Out.data());
ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
" RD\n",
@@ -588,7 +635,10 @@
"2 + \\\t\n"
"3\n",
Out));
- EXPECT_STREQ("#define A 1 + 2 + 3\n", Out.data());
+ EXPECT_STREQ("#define A 1+\\ \n"
+ "2+\\\t\n"
+ "3\n",
+ Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
@@ -682,6 +732,7 @@
TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
StringRef Source = R"(// comment
@@ -689,7 +740,8 @@
// another comment
#include <test.h>
)";
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
ASSERT_EQ(Directives.size(), 3u);
EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once);
@@ -700,7 +752,7 @@
#include <test.h>
)";
ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
- EXPECT_STREQ("#pragma once\n#include <test.h>\n", Out.data());
+ EXPECT_STREQ("#pragma once extra tokens\n#include <test.h>\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest,
@@ -755,11 +807,12 @@
Source = "#define X \"\\ \r\nx\n#include <x>\n";
ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
- EXPECT_STREQ("#define X \"\\ \r\nx\n#include <x>\n", Out.data());
+ EXPECT_STREQ("#define X\"\\ \r\nx\n#include <x>\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) {
SmallVector<char, 128> Out;
+ SmallVector<dependency_directives_scan::Token, 4> Tokens;
SmallVector<Directive, 4> Directives;
StringRef Source = R"(
@@ -789,16 +842,17 @@
import f(->a = 3);
}
)";
- ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out, Directives));
- EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;\n"
- "export import :l [[rename]];\n"
- "import <<= 3;\nimport a b d e d e f e;\n"
- "import foo [[no_unique_address]];\nimport foo();\n"
- "import f(:sefse);\nimport f(->a = 3);\n",
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+ EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
+ "exp\\\nort import:l[[rename]];"
+ "import<<=3;import a b d e d e f e;"
+ "import foo[[no_unique_address]];import foo();"
+ "import f(:sefse);import f(->a=3);\n",
Out.data());
- ASSERT_EQ(Directives.size(), 12u);
- EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_include);
- EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::cxx_module_decl);
+ ASSERT_EQ(Directives.size(), 10u);
+ EXPECT_EQ(Directives[0].Kind, pp_include);
+ EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl);
}
} // end anonymous namespace
Index: clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
===================================================================
--- clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
+++ clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c
@@ -15,7 +15,7 @@
#pragma include_alias(<string>, "mystring.h")
// CHECK: #pragma once
-// CHECK-NEXT: #pragma push_macro( "MYMACRO" )
+// CHECK-NEXT: #pragma push_macro("MYMACRO")
// CHECK-NEXT: #pragma pop_macro("MYMACRO")
// CHECK-NEXT: #pragma clang module import mymodule
// CHECK-NEXT: #pragma include_alias(<string>, "mystring.h")
Index: clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
===================================================================
--- clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
+++ clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
@@ -1,3 +1,4 @@
-// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
-#define 0 0 // expected-error {{macro name must be an identifier}}
+#define 0 0
+// CHECK: #define 0 0
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -182,20 +182,6 @@
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS) {
- DepFS->enableDirectivesScanningOfAllFiles();
- // Don't minimize any files that contributed to prebuilt modules. The
- // implicit build validates the modules by comparing the reported sizes of
- // their inputs to the current state of the filesystem. Minimization would
- // throw this mechanism off.
- for (const auto &File : PrebuiltModulesInputFiles)
- DepFS->disableDirectivesScanning(File.getKey());
- // Don't minimize any files that were explicitly passed in the build
- // settings and that might be opened.
- for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries)
- DepFS->disableDirectivesScanning(E.Path);
- for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles)
- DepFS->disableDirectivesScanning(F);
-
// Support for virtual file system overlays on top of the caching
// filesystem.
FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
-#include "clang/Lex/DependencyDirectivesScanner.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/Threading.h"
@@ -44,48 +43,41 @@
EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
if (Entry.isError() || Entry.isDirectory() || Disable ||
- !shouldScanForDirectives(Filename, Entry.getUniqueID()))
- return EntryRef(/*Minimized=*/false, Filename, Entry);
+ !shouldScanForDirectives(Filename))
+ return EntryRef(Filename, Entry);
CachedFileContents *Contents = Entry.getCachedContents();
assert(Contents && "contents not initialized");
// Double-checked locking.
- if (Contents->MinimizedAccess.load())
- return EntryRef(/*Minimized=*/true, Filename, Entry);
+ if (Contents->DepDirectives.load())
+ return EntryRef(Filename, Entry);
std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
// Double-checked locking.
- if (Contents->MinimizedAccess.load())
- return EntryRef(/*Minimized=*/true, Filename, Entry);
+ if (Contents->DepDirectives.load())
+ return EntryRef(Filename, Entry);
- llvm::SmallString<1024> MinimizedFileContents;
- // Minimize the file down to directives that might affect the dependencies.
- SmallVector<dependency_directives_scan::Directive, 64> Tokens;
+ SmallVector<dependency_directives_scan::Directive, 64> Directives;
+ // Scan the file for preprocessor directives that might affect the
+ // dependencies.
if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
- MinimizedFileContents, Tokens)) {
+ Contents->DepDirectiveTokens,
+ Directives)) {
+ Contents->DepDirectiveTokens.clear();
// FIXME: Propagate the diagnostic if desired by the client.
- // Use the original file if the minimization failed.
- Contents->MinimizedStorage =
- llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
- Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
- return EntryRef(/*Minimized=*/true, Filename, Entry);
+ Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>());
+ return EntryRef(Filename, Entry);
}
- // The contents produced by the minimizer must be null terminated.
- assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
- "not null terminated contents");
-
- Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
- std::move(MinimizedFileContents));
- // This function performed double-checked locking using `MinimizedAccess`.
- // Assigning it must be the last thing this function does. If we were to
- // assign it before `PPSkippedRangeMapping`, other threads may skip the
- // critical section (`MinimizedAccess != nullptr`) and access the mappings
- // that are about to be initialized, leading to a data race.
- Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
- return EntryRef(/*Minimized=*/true, Filename, Entry);
+ // This function performed double-checked locking using `DepDirectives`.
+ // Assigning it must be the last thing this function does, otherwise other
+ // threads may skip the
+ // critical section (`DepDirectives != nullptr`), leading to a data race.
+ Contents->DepDirectives.store(
+ new Optional<DependencyDirectivesTy>(std::move(Directives)));
+ return EntryRef(Filename, Entry);
}
DependencyScanningFilesystemSharedCache::
@@ -192,19 +184,9 @@
return shouldScanForDirectivesBasedOnExtension(Filename);
}
-void DependencyScanningWorkerFilesystem::disableDirectivesScanning(
- StringRef Filename) {
- // Since we're not done setting up `NotToBeScanned` yet, we need to disable
- // directive scanning explicitly.
- if (llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(
- Filename, /*DisableDirectivesScanning=*/true))
- NotToBeScanned.insert(Result->getStatus().getUniqueID());
-}
-
bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
- StringRef Filename, llvm::sys::fs::UniqueID UID) {
- return shouldScanForDirectivesBasedOnExtension(Filename) &&
- !NotToBeScanned.contains(UID);
+ StringRef Filename) {
+ return shouldScanForDirectivesBasedOnExtension(Filename);
}
const CachedFileSystemEntry &
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -226,13 +226,11 @@
return L;
}
-bool Lexer::skipOver(unsigned NumBytes) {
- IsAtPhysicalStartOfLine = true;
- IsAtStartOfLine = true;
- if ((BufferPtr + NumBytes) > BufferEnd)
- return true;
- BufferPtr += NumBytes;
- return false;
+void Lexer::seek(unsigned Offset, bool IsAtStartOfLine) {
+ this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
+ this->IsAtStartOfLine = IsAtStartOfLine;
+ assert((BufferStart + Offset) <= BufferEnd);
+ BufferPtr = BufferStart + Offset;
}
template <typename T> static void StringifyImpl(T &Str, char Quote) {
Index: clang/lib/Lex/DependencyDirectivesScanner.cpp
===================================================================
--- clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -18,83 +18,127 @@
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/MemoryBuffer.h"
-using namespace llvm;
using namespace clang;
using namespace clang::dependency_directives_scan;
+using namespace llvm;
namespace {
-struct Scanner {
- /// Minimized output.
- SmallVectorImpl<char> &Out;
- /// The known tokens encountered during the minimization.
- SmallVectorImpl<Directive> &Directives;
+struct DirectiveWithTokens {
+ DirectiveKind Kind;
+ unsigned NumTokens;
- Scanner(SmallVectorImpl<char> &Out, SmallVectorImpl<Directive> &Directives,
- StringRef Input, DiagnosticsEngine *Diags,
- SourceLocation InputSourceLoc)
- : Out(Out), Directives(Directives), Input(Input), Diags(Diags),
- InputSourceLoc(InputSourceLoc) {}
+ DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
+ : Kind(Kind), NumTokens(NumTokens) {}
+};
+
+/// Does an efficient "scan" of the sources to detect the presence of
+/// preprocessor (or module import) directives and collects the raw lexed tokens
+/// for those directives so that the \p Lexer can "replay" them when the file is
+/// included.
+///
+/// Note that the behavior of the raw lexer is affected by the language mode,
+/// while at this point we want to do a scan and collect tokens once,
+/// irrespective of the language mode that the file will get included in. To
+/// compensate for that the \p Lexer, while "replaying", will adjust a token
+/// where appropriate, when it could affect the preprocessor's state.
+/// For example in a directive like
+///
+/// \code
+/// #if __has_cpp_attribute(clang::fallthrough)
+/// \endcode
+///
+/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
+/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
+/// while in C++ mode.
+struct Scanner {
+ Scanner(StringRef Input,
+ SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+ DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
+ : Input(Input), Tokens(Tokens), Diags(Diags),
+ InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
+ TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
+ Input.end()) {}
+
+ static LangOptions getLangOptsForDepScanning() {
+ LangOptions LangOpts;
+ // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
+ LangOpts.ObjC = true;
+ LangOpts.LineComment = true;
+ return LangOpts;
+ }
/// Lex the provided source and emit the directive tokens.
///
/// \returns True on error.
- bool scan();
+ bool scan(SmallVectorImpl<Directive> &Directives);
private:
- struct IdInfo {
- const char *Last;
- StringRef Name;
- };
+ /// Lexes next token and advances \p First and the \p Lexer.
+ LLVM_NODISCARD dependency_directives_scan::Token &
+ lexToken(const char *&First, const char *const End);
- /// Lex an identifier.
+ dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
+ const char *const End);
+
+ /// Lexes next token and if it is identifier returns its string, otherwise
+ /// it skips the current line and returns \p None.
///
- /// \pre First points at a valid identifier head.
- LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
- LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
- const char *const End);
+ /// In any case (whatever the token kind) \p First and the \p Lexer will
+ /// advance beyond the token.
+ LLVM_NODISCARD Optional<StringRef>
+ tryLexIdentifierOrSkipLine(const char *&First, const char *const End);
+
+ /// Used when it is certain that next token is an identifier.
+ LLVM_NODISCARD StringRef lexIdentifier(const char *&First,
+ const char *const End);
+
+ /// Lexes next token and returns true iff it is an identifier that matches \p
+ /// Id, otherwise it skips the current line and returns false.
+ ///
+ /// In any case (whatever the token kind) \p First and the \p Lexer will
+ /// advance beyond the token.
+ LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id,
+ const char *&First,
+ const char *const End);
+
LLVM_NODISCARD bool scanImpl(const char *First, const char *const End);
LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
- LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+ LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First,
+ const char *const End);
LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
- LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, StringRef Directive,
- const char *&First, const char *const End);
- Directive &pushDirective(DirectiveKind K) {
- Directives.emplace_back(K, Out.size());
- return Directives.back();
+ LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First,
+ const char *const End);
+ LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind,
+ const char *&First,
+ const char *const End);
+ void lexPPDirectiveBody(const char *&First, const char *const End);
+
+ DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
+ Tokens.append(CurDirToks);
+ DirsWithToks.emplace_back(Kind, CurDirToks.size());
+ CurDirToks.clear();
+ return DirsWithToks.back();
}
void popDirective() {
- Out.resize(Directives.back().Offset);
- Directives.pop_back();
+ Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
}
DirectiveKind topDirective() const {
- return Directives.empty() ? pp_none : Directives.back().Kind;
+ return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
}
- Scanner &put(char Byte) {
- Out.push_back(Byte);
- return *this;
+ unsigned getOffsetAt(const char *CurPtr) const {
+ return CurPtr - Input.data();
}
- Scanner &append(StringRef S) { return append(S.begin(), S.end()); }
- Scanner &append(const char *First, const char *Last) {
- Out.append(First, Last);
- return *this;
- }
-
- void printToNewline(const char *&First, const char *const End);
- void printAdjacentModuleNameParts(const char *&First, const char *const End);
- LLVM_NODISCARD bool printAtImportBody(const char *&First,
- const char *const End);
- void printDirectiveBody(const char *&First, const char *const End);
- void printAdjacentMacroArgs(const char *&First, const char *const End);
- LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
/// Reports a diagnostic if the diagnostic engine is provided. Always returns
/// true at the end.
@@ -102,8 +146,20 @@
StringMap<char> SplitIds;
StringRef Input;
+ SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
DiagnosticsEngine *Diags;
SourceLocation InputSourceLoc;
+
+ /// Keeps track of the tokens for the currently lexed directive. Once a
+ /// directive is fully lexed and "committed" then the tokens get appended to
+ /// \p Tokens and \p CurDirToks is cleared for the next directive.
+ SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
+ /// The directives that were lexed along with the number of tokens that each
+ /// directive contains. The tokens of all the directives are kept in \p Tokens
+ /// vector, in the same order as the directives order in \p DirsWithToks.
+ SmallVector<DirectiveWithTokens, 64> DirsWithToks;
+ LangOptions LangOpts;
+ Lexer TheLexer;
};
} // end anonymous namespace
@@ -112,7 +168,7 @@
if (!Diags)
return true;
assert(CurPtr >= Input.data() && "invalid buffer ptr");
- Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+ Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);
return true;
}
@@ -265,30 +321,6 @@
}
}
-static const char *findLastNonSpace(const char *First, const char *Last) {
- assert(First <= Last);
- while (First != Last && isHorizontalWhitespace(Last[-1]))
- --Last;
- return Last;
-}
-
-static const char *findLastNonSpaceNonBackslash(const char *First,
- const char *Last) {
- assert(First <= Last);
- while (First != Last &&
- (isHorizontalWhitespace(Last[-1]) || Last[-1] == '\\'))
- --Last;
- return Last;
-}
-
-static const char *findFirstTrailingSpace(const char *First, const char *Last) {
- const char *LastNonSpace = findLastNonSpace(First, Last);
- if (Last == LastNonSpace)
- return Last;
- assert(isHorizontalWhitespace(LastNonSpace[0]));
- return LastNonSpace + 1;
-}
-
static void skipLineComment(const char *&First, const char *const End) {
assert(First[0] == '/' && First[1] == '/');
First += 2;
@@ -396,67 +428,6 @@
skipLine(First, End);
}
-void Scanner::printToNewline(const char *&First, const char *const End) {
- while (First != End && !isVerticalWhitespace(*First)) {
- const char *Last = First;
- do {
- // Iterate over strings correctly to avoid comments and newlines.
- if (*Last == '"' || *Last == '\'' ||
- (*Last == '<' &&
- (topDirective() == pp_include || topDirective() == pp_import))) {
- if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
- skipRawString(Last, End);
- else
- skipString(Last, End);
- continue;
- }
- if (*Last != '/' || End - Last < 2) {
- ++Last;
- continue; // Gather the rest up to print verbatim.
- }
-
- if (Last[1] != '/' && Last[1] != '*') {
- ++Last;
- continue;
- }
-
- // Deal with "//..." and "/*...*/".
- append(First, findFirstTrailingSpace(First, Last));
- First = Last;
-
- if (Last[1] == '/') {
- skipLineComment(First, End);
- return;
- }
-
- put(' ');
- skipBlockComment(First, End);
- skipOverSpaces(First, End);
- Last = First;
- } while (Last != End && !isVerticalWhitespace(*Last));
-
- // Print out the string.
- const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
- if (Last == End || LastBeforeTrailingSpace == First ||
- LastBeforeTrailingSpace[-1] != '\\') {
- append(First, LastBeforeTrailingSpace);
- First = Last;
- skipNewline(First, End);
- return;
- }
-
- // Print up to the last character that's not a whitespace or backslash.
- // Then print exactly one space, which matters when tokens are separated by
- // a line continuation.
- append(First, findLastNonSpaceNonBackslash(First, Last));
- put(' ');
-
- First = Last;
- skipNewline(First, End);
- skipOverSpaces(First, End);
- }
-}
-
static void skipWhitespace(const char *&First, const char *const End) {
for (;;) {
assert(First <= End);
@@ -489,176 +460,134 @@
}
}
-void Scanner::printAdjacentModuleNameParts(const char *&First,
- const char *const End) {
- // Skip over parts of the body.
- const char *Last = First;
- do
- ++Last;
- while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.'));
- append(First, Last);
- First = Last;
-}
-
-bool Scanner::printAtImportBody(const char *&First, const char *const End) {
+bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
+ const char *const End) {
+ const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
for (;;) {
- skipWhitespace(First, End);
- if (First == End)
- return true;
-
- if (isVerticalWhitespace(*First)) {
- skipNewline(First, End);
- continue;
- }
-
- // Found a semicolon.
- if (*First == ';') {
- put(*First++).put('\n');
- return false;
- }
-
- // Don't handle macro expansions inside @import for now.
- if (!isAsciiIdentifierContinue(*First) && *First != '.')
- return true;
-
- printAdjacentModuleNameParts(First, End);
+ const dependency_directives_scan::Token &Tok = lexToken(First, End);
+ if (Tok.is(tok::eof))
+ return reportError(
+ DirectiveLoc,
+ diag::err_dep_source_scanner_missing_semi_after_at_import);
+ if (Tok.is(tok::semi))
+ break;
}
+ pushDirective(Kind);
+ skipWhitespace(First, End);
+ if (First == End)
+ return false;
+ if (!isVerticalWhitespace(*First))
+ return reportError(
+ DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
+ skipNewline(First, End);
+ return false;
}
-void Scanner::printDirectiveBody(const char *&First, const char *const End) {
- skipWhitespace(First, End); // Skip initial whitespace.
- printToNewline(First, End);
- while (Out.back() == ' ')
- Out.pop_back();
- put('\n');
-}
+dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
+ const char *const End) {
+ clang::Token Tok;
+ TheLexer.LexFromRawLexer(Tok);
+ First = Input.data() + TheLexer.getCurrentBufferOffset();
+ assert(First <= End);
-LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
- const char *const End) {
- assert(isAsciiIdentifierContinue(*First) && "invalid identifer");
- const char *Last = First + 1;
- while (Last != End && isAsciiIdentifierContinue(*Last))
- ++Last;
- return Last;
+ unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+ CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+ Tok.getFlags());
+ return CurDirToks.back();
}
-LLVM_NODISCARD static const char *
-getIdentifierContinuation(const char *First, const char *const End) {
- if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
- return nullptr;
+dependency_directives_scan::Token &
+Scanner::lexIncludeFilename(const char *&First, const char *const End) {
+ clang::Token Tok;
+ TheLexer.LexIncludeFilename(Tok);
+ First = Input.data() + TheLexer.getCurrentBufferOffset();
+ assert(First <= End);
- ++First;
- skipNewline(First, End);
- if (First == End)
- return nullptr;
- return isAsciiIdentifierContinue(First[0]) ? First : nullptr;
-}
-
-Scanner::IdInfo Scanner::lexIdentifier(const char *First,
- const char *const End) {
- const char *Last = lexRawIdentifier(First, End);
- const char *Next = getIdentifierContinuation(Last, End);
- if (LLVM_LIKELY(!Next))
- return IdInfo{Last, StringRef(First, Last - First)};
-
- // Slow path, where identifiers are split over lines.
- SmallVector<char, 64> Id(First, Last);
- while (Next) {
- Last = lexRawIdentifier(Next, End);
- Id.append(Next, Last);
- Next = getIdentifierContinuation(Last, End);
- }
- return IdInfo{
- Last,
- SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+ unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
+ CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
+ Tok.getFlags());
+ return CurDirToks.back();
}
-void Scanner::printAdjacentMacroArgs(const char *&First,
- const char *const End) {
- // Skip over parts of the body.
- const char *Last = First;
- do
- ++Last;
- while (Last != End &&
- (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ','));
- append(First, Last);
- First = Last;
+void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
+ while (true) {
+ const dependency_directives_scan::Token &Tok = lexToken(First, End);
+ if (Tok.is(tok::eod))
+ break;
+ }
}
-bool Scanner::printMacroArgs(const char *&First, const char *const End) {
- assert(*First == '(');
- put(*First++);
- for (;;) {
- skipWhitespace(First, End);
- if (First == End)
- return true;
+LLVM_NODISCARD Optional<StringRef>
+Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
+ const dependency_directives_scan::Token &Tok = lexToken(First, End);
+ if (Tok.isNot(tok::raw_identifier)) {
+ if (!Tok.is(tok::eod))
+ skipLine(First, End);
+ return None;
+ }
- if (*First == ')') {
- put(*First++);
- return false;
- }
+ bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
+ if (LLVM_LIKELY(!NeedsCleaning))
+ return Input.slice(Tok.Offset, Tok.getEnd());
- // This is intentionally fairly liberal.
- if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ','))
- return true;
+ SmallString<64> Spelling;
+ Spelling.resize(Tok.Length);
- printAdjacentMacroArgs(First, End);
+ unsigned SpellingLength = 0;
+ const char *BufPtr = Input.begin() + Tok.Offset;
+ const char *AfterIdent = Input.begin() + Tok.getEnd();
+ while (BufPtr < AfterIdent) {
+ unsigned Size;
+ Spelling[SpellingLength++] =
+ Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
}
+
+ return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
+ .first->first();
}
-/// Looks for an identifier starting from Last.
-///
-/// Updates "First" to just past the next identifier, if any. Returns true iff
-/// the identifier matches "Id".
-bool Scanner::isNextIdentifier(StringRef Id, const char *&First,
- const char *const End) {
- skipWhitespace(First, End);
- if (First == End || !isAsciiIdentifierStart(*First))
- return false;
+StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
+ Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
+ assert(Id.hasValue() && "expected identifier token");
+ return Id.getValue();
+}
- IdInfo FoundId = lexIdentifier(First, End);
- First = FoundId.Last;
- return FoundId.Name == Id;
+bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
+ const char *const End) {
+ if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) {
+ if (*FoundId == Id)
+ return true;
+ skipLine(First, End);
+ }
+ return false;
}
bool Scanner::lexAt(const char *&First, const char *const End) {
// Handle "@import".
- const char *ImportLoc = First++;
- if (!isNextIdentifier("import", First, End)) {
- skipLine(First, End);
- return false;
- }
- pushDirective(decl_at_import);
- append("@import ");
- if (printAtImportBody(First, End))
- return reportError(
- ImportLoc, diag::err_dep_source_scanner_missing_semi_after_at_import);
- skipWhitespace(First, End);
- if (First == End)
+
+ // Lex '@'.
+ const dependency_directives_scan::Token &AtTok = lexToken(First, End);
+ assert(AtTok.is(tok::at));
+ (void)AtTok;
+
+ if (!isNextIdentifierOrSkipLine("import", First, End))
return false;
- if (!isVerticalWhitespace(*First))
- return reportError(
- ImportLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
- skipNewline(First, End);
- return false;
+ return lexModuleDirectiveBody(decl_at_import, First, End);
}
bool Scanner::lexModule(const char *&First, const char *const End) {
- IdInfo Id = lexIdentifier(First, End);
- First = Id.Last;
+ StringRef Id = lexIdentifier(First, End);
bool Export = false;
- if (Id.Name == "export") {
+ if (Id == "export") {
Export = true;
- skipWhitespace(First, End);
- if (!isAsciiIdentifierContinue(*First)) {
- skipLine(First, End);
+ Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
+ if (!NextId)
return false;
- }
- Id = lexIdentifier(First, End);
- First = Id.Last;
+ Id = *NextId;
}
- if (Id.Name != "module" && Id.Name != "import") {
+ if (Id != "module" && Id != "import") {
skipLine(First, End);
return false;
}
@@ -680,114 +609,51 @@
}
}
- if (Export) {
- pushDirective(cxx_export_decl);
- append("export ");
- }
+ TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);
- if (Id.Name == "module")
- pushDirective(cxx_module_decl);
+ DirectiveKind Kind;
+ if (Id == "module")
+ Kind = Export ? cxx_export_module_decl : cxx_module_decl;
else
- pushDirective(cxx_import_decl);
- append(Id.Name);
- append(" ");
- printToNewline(First, End);
- append("\n");
- return false;
-}
-
-bool Scanner::lexDefine(const char *&First, const char *const End) {
- pushDirective(pp_define);
- append("#define ");
- skipWhitespace(First, End);
-
- if (!isAsciiIdentifierStart(*First))
- return reportError(First, diag::err_pp_macro_not_identifier);
+ Kind = Export ? cxx_export_import_decl : cxx_import_decl;
- IdInfo Id = lexIdentifier(First, End);
- const char *Last = Id.Last;
- append(Id.Name);
- if (Last == End)
- return false;
- if (*Last == '(') {
- size_t Size = Out.size();
- if (printMacroArgs(Last, End)) {
- // Be robust to bad macro arguments, since they can show up in disabled
- // code.
- Out.resize(Size);
- append("(/* invalid */\n");
- skipLine(Last, End);
- return false;
- }
- }
- skipWhitespace(Last, End);
- if (Last == End)
- return false;
- if (!isVerticalWhitespace(*Last))
- put(' ');
- printDirectiveBody(Last, End);
- First = Last;
- return false;
+ return lexModuleDirectiveBody(Kind, First, End);
}
bool Scanner::lexPragma(const char *&First, const char *const End) {
- // #pragma.
- skipWhitespace(First, End);
- if (First == End || !isAsciiIdentifierStart(*First))
+ Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+ if (!FoundId)
return false;
- IdInfo FoundId = lexIdentifier(First, End);
- First = FoundId.Last;
- if (FoundId.Name == "once") {
- // #pragma once
- skipLine(First, End);
- pushDirective(pp_pragma_once);
- append("#pragma once\n");
- return false;
- }
- if (FoundId.Name == "push_macro") {
- // #pragma push_macro
- pushDirective(pp_pragma_push_macro);
- append("#pragma push_macro");
- printDirectiveBody(First, End);
- return false;
- }
- if (FoundId.Name == "pop_macro") {
- // #pragma pop_macro
- pushDirective(pp_pragma_pop_macro);
- append("#pragma pop_macro");
- printDirectiveBody(First, End);
- return false;
- }
- if (FoundId.Name == "include_alias") {
- // #pragma include_alias
- pushDirective(pp_pragma_include_alias);
- append("#pragma include_alias");
- printDirectiveBody(First, End);
+ StringRef Id = FoundId.getValue();
+ auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
+ .Case("once", pp_pragma_once)
+ .Case("push_macro", pp_pragma_push_macro)
+ .Case("pop_macro", pp_pragma_pop_macro)
+ .Case("include_alias", pp_pragma_include_alias)
+ .Default(pp_none);
+ if (Kind != pp_none) {
+ lexPPDirectiveBody(First, End);
+ pushDirective(Kind);
return false;
}
- if (FoundId.Name != "clang") {
+ if (Id != "clang") {
skipLine(First, End);
return false;
}
// #pragma clang.
- if (!isNextIdentifier("module", First, End)) {
- skipLine(First, End);
+ if (!isNextIdentifierOrSkipLine("module", First, End))
return false;
- }
// #pragma clang module.
- if (!isNextIdentifier("import", First, End)) {
- skipLine(First, End);
+ if (!isNextIdentifierOrSkipLine("import", First, End))
return false;
- }
// #pragma clang module import.
+ lexPPDirectiveBody(First, End);
pushDirective(pp_pragma_import);
- append("#pragma clang module import ");
- printDirectiveBody(First, End);
return false;
}
@@ -808,14 +674,13 @@
return false;
}
- return lexDefault(pp_endif, "endif", First, End);
+ return lexDefault(pp_endif, First, End);
}
-bool Scanner::lexDefault(DirectiveKind Kind, StringRef Directive,
- const char *&First, const char *const End) {
+bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
+ const char *const End) {
+ lexPPDirectiveBody(First, End);
pushDirective(Kind);
- put('#').append(Directive).put(' ');
- printDirectiveBody(First, End);
return false;
}
@@ -845,6 +710,14 @@
return false;
}
+ TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
+
+ auto ScEx1 = make_scope_exit([&]() {
+ /// Clear Scanner's CurDirToks before returning, in case we didn't push a
+ /// new directive.
+ CurDirToks.clear();
+ });
+
// Handle "@import".
if (*First == '@')
return lexAt(First, End);
@@ -853,25 +726,26 @@
return lexModule(First, End);
// Handle preprocessing directives.
- ++First; // Skip over '#'.
- skipWhitespace(First, End);
- if (First == End)
- return reportError(First, diag::err_pp_expected_eol);
+ TheLexer.setParsingPreprocessorDirective(true);
+ auto ScEx2 = make_scope_exit(
+ [&]() { TheLexer.setParsingPreprocessorDirective(false); });
- if (!isAsciiIdentifierStart(*First)) {
- skipLine(First, End);
+ // Lex '#'.
+ const dependency_directives_scan::Token &HashTok = lexToken(First, End);
+ assert(HashTok.is(tok::hash));
+ (void)HashTok;
+
+ Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
+ if (!FoundId)
return false;
- }
- // Figure out the token.
- IdInfo Id = lexIdentifier(First, End);
- First = Id.Last;
+ StringRef Id = FoundId.getValue();
- if (Id.Name == "pragma")
+ if (Id == "pragma")
return lexPragma(First, End);
- auto Kind = llvm::StringSwitch<DirectiveKind>(Id.Name)
+ auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
.Case("include", pp_include)
.Case("__include_macros", pp___include_macros)
.Case("define", pp_define)
@@ -888,18 +762,26 @@
.Case("endif", pp_endif)
.Default(pp_none);
if (Kind == pp_none) {
- skipDirective(Id.Name, First, End);
+ skipDirective(Id, First, End);
return false;
}
if (Kind == pp_endif)
return lexEndif(First, End);
- if (Kind == pp_define)
- return lexDefine(First, End);
+ switch (Kind) {
+ case pp_include:
+ case pp___include_macros:
+ case pp_include_next:
+ case pp_import:
+ lexIncludeFilename(First, End);
+ break;
+ default:
+ break;
+ }
// Everything else.
- return lexDefault(Kind, Id.Name, First, End);
+ return lexDefault(Kind, First, End);
}
static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
@@ -916,28 +798,65 @@
return false;
}
-bool Scanner::scan() {
+bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
bool Error = scanImpl(Input.begin(), Input.end());
if (!Error) {
- // Add a trailing newline and an EOF on success.
- if (!Out.empty() && Out.back() != '\n')
- Out.push_back('\n');
+ // Add an EOF on success.
pushDirective(pp_eof);
}
- // Null-terminate the output. This way the memory buffer that's passed to
- // Clang will not have to worry about the terminating '\0'.
- Out.push_back(0);
- Out.pop_back();
+ ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
+ for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
+ assert(RemainingTokens.size() >= DirWithToks.NumTokens);
+ Directives.emplace_back(DirWithToks.Kind,
+ RemainingTokens.take_front(DirWithToks.NumTokens));
+ RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
+ }
+ assert(RemainingTokens.empty());
+
return Error;
}
bool clang::scanSourceForDependencyDirectives(
- StringRef Input, SmallVectorImpl<char> &Output,
+ StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
SourceLocation InputSourceLoc) {
- Output.clear();
- Directives.clear();
- return Scanner(Output, Directives, Input, Diags, InputSourceLoc).scan();
+ return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
+}
+
+void clang::printDependencyDirectivesAsSource(
+ StringRef Source,
+ ArrayRef<dependency_directives_scan::Directive> Directives,
+ llvm::raw_ostream &OS) {
+ // Add a space separator where it is convenient for testing purposes.
+ auto needsSpaceSeparator =
+ [](tok::TokenKind Prev,
+ const dependency_directives_scan::Token &Tok) -> bool {
+ if (Prev == Tok.Kind)
+ return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
+ tok::r_square);
+ if (Prev == tok::raw_identifier &&
+ Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
+ tok::char_constant, tok::header_name))
+ return true;
+ if (Prev == tok::r_paren &&
+ Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
+ tok::char_constant, tok::unknown))
+ return true;
+ if (Prev == tok::comma &&
+ Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
+ return true;
+ return false;
+ };
+
+ for (const dependency_directives_scan::Directive &Directive : Directives) {
+ Optional<tok::TokenKind> PrevTokenKind;
+ for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
+ if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
+ OS << ' ';
+ PrevTokenKind = Tok.Kind;
+ OS << Source.slice(Tok.Offset, Tok.getEnd());
+ }
+ }
}
Index: clang/lib/Frontend/FrontendActions.cpp
===================================================================
--- clang/lib/Frontend/FrontendActions.cpp
+++ clang/lib/Frontend/FrontendActions.cpp
@@ -1157,10 +1157,10 @@
SourceManager &SM = CI.getPreprocessor().getSourceManager();
llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID());
- llvm::SmallString<1024> Output;
+ llvm::SmallVector<dependency_directives_scan::Token, 16> Tokens;
llvm::SmallVector<dependency_directives_scan::Directive, 32> Directives;
if (scanSourceForDependencyDirectives(
- FromFile.getBuffer(), Output, Directives, &CI.getDiagnostics(),
+ FromFile.getBuffer(), Tokens, Directives, &CI.getDiagnostics(),
SM.getLocForStartOfFile(SM.getMainFileID()))) {
assert(CI.getDiagnostics().hasErrorOccurred() &&
"no errors reported for failure");
@@ -1179,7 +1179,8 @@
}
return;
}
- llvm::outs() << Output;
+ printDependencyDirectivesAsSource(FromFile.getBuffer(), Directives,
+ llvm::outs());
}
void GetDependenciesByModuleNameAction::ExecuteAction() {
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -10,6 +10,7 @@
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
#include "clang/Basic/LLVM.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Allocator.h"
@@ -21,21 +22,26 @@
namespace tooling {
namespace dependencies {
-/// Original and minimized contents of a cached file entry. Single instance can
+using DependencyDirectivesTy =
+ SmallVector<dependency_directives_scan::Directive, 20>;
+
+/// Contents and directive tokens of a cached file entry. Single instance can
/// be shared between multiple entries.
struct CachedFileContents {
- CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
- : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+ CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
+ : Original(std::move(Contents)), DepDirectives(nullptr) {}
/// Owning storage for the original contents.
std::unique_ptr<llvm::MemoryBuffer> Original;
/// The mutex that must be locked before mutating directive tokens.
std::mutex ValueLock;
- /// Owning storage for the minimized contents.
- std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+ SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
/// Accessor to the directive tokens that's atomic to avoid data races.
- std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
+ /// \p CachedFileContents has ownership of the pointer.
+ std::atomic<const Optional<DependencyDirectivesTy> *> DepDirectives;
+
+ ~CachedFileContents() { delete DepDirectives.load(); }
};
/// An in-memory representation of a file system entity that is of interest to
@@ -82,13 +88,17 @@
/// \returns The scanned preprocessor directive tokens of the file that are
/// used to speed up preprocessing, if available.
- StringRef getDirectiveTokens() const {
+ Optional<ArrayRef<dependency_directives_scan::Directive>>
+ getDirectiveTokens() const {
assert(!isError() && "error");
- assert(!MaybeStat->isDirectory() && "not a file");
+ assert(!isDirectory() && "not a file");
assert(Contents && "contents not initialized");
- llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
- assert(Buffer && "not minimized");
- return Buffer->getBuffer();
+ if (auto *Directives = Contents->DepDirectives.load()) {
+ if (Directives->hasValue())
+ return ArrayRef<dependency_directives_scan::Directive>(
+ Directives->getValue());
+ }
+ return None;
}
/// \returns The error.
@@ -224,10 +234,6 @@
/// If the underlying entry is an opened file, this wrapper returns the file
/// contents and the scanned preprocessor directives.
class EntryRef {
- /// For entry that is an opened file, this bit signifies whether its contents
- /// are minimized.
- bool Minimized;
-
/// The filename used to access this entry.
std::string Filename;
@@ -235,8 +241,8 @@
const CachedFileSystemEntry &Entry;
public:
- EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
- : Minimized(Minimized), Filename(Name), Entry(Entry) {}
+ EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
+ : Filename(Name), Entry(Entry) {}
llvm::vfs::Status getStatus() const {
llvm::vfs::Status Stat = Entry.getStatus();
@@ -255,8 +261,11 @@
return *this;
}
- StringRef getContents() const {
- return Minimized ? Entry.getDirectiveTokens() : Entry.getOriginalContents();
+ StringRef getContents() const { return Entry.getOriginalContents(); }
+
+ Optional<ArrayRef<dependency_directives_scan::Directive>>
+ getDirectiveTokens() const {
+ return Entry.getDirectiveTokens();
}
};
@@ -280,14 +289,9 @@
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine &Path) override;
- /// Disable directives scanning of the given file.
- void disableDirectivesScanning(StringRef Filename);
- /// Enable directives scanning of all files.
- void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); }
-
private:
/// Check whether the file should be scanned for preprocessor directives.
- bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID);
+ bool shouldScanForDirectives(StringRef Filename);
/// Returns entry for the given filename.
///
@@ -377,8 +381,6 @@
/// The local cache is used by the worker thread to cache file system queries
/// locally instead of querying the global cache every time.
DependencyScanningFilesystemLocalCache LocalCache;
- /// The set of files that should not be scanned for PP directives.
- llvm::DenseSet<llvm::sys::fs::UniqueID> NotToBeScanned;
};
} // end namespace dependencies
Index: clang/include/clang/Lex/Lexer.h
===================================================================
--- clang/include/clang/Lex/Lexer.h
+++ clang/include/clang/Lex/Lexer.h
@@ -288,14 +288,8 @@
return BufferPtr - BufferStart;
}
- /// Skip over \p NumBytes bytes.
- ///
- /// If the skip is successful, the next token will be lexed from the new
- /// offset. The lexer also assumes that we skipped to the start of the line.
- ///
- /// \returns true if the skip failed (new offset would have been past the
- /// end of the buffer), false otherwise.
- bool skipOver(unsigned NumBytes);
+ /// Set the lexer's buffer pointer to \p Offset.
+ void seek(unsigned Offset, bool IsAtStartOfLine);
/// Stringify - Convert the specified string into a C string by i) escaping
/// '\\' and " characters and ii) replacing newline character(s) with "\\n".
Index: clang/include/clang/Lex/DependencyDirectivesScanner.h
===================================================================
--- clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -19,15 +19,41 @@
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
namespace clang {
+namespace tok {
+enum TokenKind : unsigned short;
+}
+
class DiagnosticsEngine;
namespace dependency_directives_scan {
+/// Token lexed as part of dependency directive scanning.
+struct Token {
+ /// Offset into the original source input.
+ unsigned Offset;
+ unsigned Length;
+ tok::TokenKind Kind;
+ unsigned short Flags;
+
+ Token(unsigned Offset, unsigned Length, tok::TokenKind Kind,
+ unsigned short Flags)
+ : Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {}
+
+ unsigned getEnd() const { return Offset + Length; }
+
+ bool is(tok::TokenKind K) const { return Kind == K; }
+ bool isNot(tok::TokenKind K) const { return Kind != K; }
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+ return is(K1) || is(K2);
+ }
+ template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
+ return is(K1) || isOneOf(Ks...);
+ }
+};
+
/// Represents the kind of preprocessor directive or a module declaration that
/// is tracked by the scanner in its token output.
enum DirectiveKind : uint8_t {
@@ -52,9 +78,10 @@
pp_else,
pp_endif,
decl_at_import,
- cxx_export_decl,
cxx_module_decl,
cxx_import_decl,
+ cxx_export_module_decl,
+ cxx_export_import_decl,
pp_eof,
};
@@ -62,35 +89,48 @@
/// scanning. It's used to track various preprocessor directives that could
/// potentially have an effect on the depedencies.
struct Directive {
+ ArrayRef<Token> Tokens;
+
/// The kind of token.
DirectiveKind Kind = pp_none;
- /// Offset into the output byte stream of where the directive begins.
- int Offset = -1;
-
- Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {}
+ Directive() = default;
+ Directive(DirectiveKind K, ArrayRef<Token> Tokens)
+ : Tokens(Tokens), Kind(K) {}
};
} // end namespace dependency_directives_scan
-/// Minimize the input down to the preprocessor directives that might have
+/// Scan the input for the preprocessor directives that might have
/// an effect on the dependencies for a compilation unit.
///
-/// This function deletes all non-preprocessor code, and strips anything that
-/// can't affect what gets included. It canonicalizes whitespace where
-/// convenient to stabilize the output against formatting changes in the input.
-///
-/// Clears the output vectors at the beginning of the call.
+/// This function ignores all non-preprocessor code and anything that
+/// can't affect what gets included.
///
/// \returns false on success, true on error. If the diagnostic engine is not
/// null, an appropriate error is reported using the given input location
-/// with the offset that corresponds to the minimizer's current buffer offset.
+/// with the offset that corresponds to the \p Input buffer offset.
bool scanSourceForDependencyDirectives(
- llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
- llvm::SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
+ StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
+ SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
DiagnosticsEngine *Diags = nullptr,
SourceLocation InputSourceLoc = SourceLocation());
+/// Print the previously scanned dependency directives as minimized source text.
+///
+/// \param Source The original source text that the dependency directives were
+/// scanned from.
+/// \param Directives The previously scanned dependency
+/// directives.
+/// \param OS the stream to print the dependency directives on.
+///
+/// This is used primarily for testing purposes, during dependency scanning the
+/// \p Lexer uses the tokens directly, not their printed version.
+void printDependencyDirectivesAsSource(
+ StringRef Source,
+ ArrayRef<dependency_directives_scan::Directive> Directives,
+ llvm::raw_ostream &OS);
+
} // end namespace clang
#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits