On Mon, Oct 10, 2022 at 11:13 AM Sam McCall <sam.mcc...@gmail.com> wrote: > > On Mon, 10 Oct 2022, 19:57 David Blaikie, <dblai...@gmail.com> wrote: >> >> Could the underlying API be made more robust to handle StringRefs >> rather than null terminated char* in the first place? (like >> SourceManager::getCharacterData could return StringRef? Presumably at >> some layer it already knows how long the file is - the underlying >> MemoryBuffer, in this case) > > Maybe... > > You could return a StringRef(Pos, EOF) efficiently. > However it's not a great fit for many of the existing (>150) callers: > - that are looking for the *endpoint* of some text
Yeah, I figured that was more what the API was about for better/worse. I guess it's not worth having two entry points, a lower-level one that returns StringRef and then the existing one that calls that and returns the data pointer from the StringRef... > - which want to run the lexer (the returned char* is implicitly > null-terminated, StringRef would obscure that guarantee) I guess by implication the lexer needs something null terminated? Be nice if that wasn't the case too... > - the function is marked as "this is very hot", though I doubt it matters > that much *nod* At least something to be careful about. > It also leads to some unnatural code at callsites such as this, because "from > the start of this entity until the end of the file" is an unnatural range to > have, it would probably be easy to mistake for "from the start to the end of > this entity". Yeah. :/ > I think what this function does is too low-level to make safe and obvious, > and unfortunately it's hard to build high-level things on top of it. Yeah :/ > (e.g. StringRef getTextRange(SourceRange) would be great to have, but > actually you it needs to run the lexer and assert nontrivial invariants due > to design decisions elsewhere in clang) > >> >> On Thu, Oct 6, 2022 at 2:39 AM Sam McCall via cfe-commits >> <cfe-commits@lists.llvm.org> wrote: >> > >> > >> > Author: Sam McCall >> > Date: 2022-10-06T11:38:55+02:00 >> > New Revision: 5d2d527c32da2081b814ef8b446bc3e037f74b0a >> > >> > URL: >> > https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a >> > DIFF: >> > https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a.diff >> > >> > LOG: [clangd] Avoid scanning up to end of file on each comment! >> > >> > Assigning char* (pointing at comment start) to StringRef was causing us >> > to scan the rest of the source file looking for the null terminator. >> > >> > This seems to be eating about 8% of our *total* CPU! >> > >> > While fixing this, factor out the common bits from the two places we're >> > parsing IWYU pragmas. >> > >> > Differential Revision: https://reviews.llvm.org/D135314 >> > >> > Added: >> > >> > >> > Modified: >> > clang-tools-extra/clangd/Headers.cpp >> > clang-tools-extra/clangd/Headers.h >> > clang-tools-extra/clangd/index/CanonicalIncludes.cpp >> > clang-tools-extra/clangd/unittests/HeadersTests.cpp >> > >> > Removed: >> > >> > >> > >> > ################################################################################ >> > diff --git a/clang-tools-extra/clangd/Headers.cpp >> > b/clang-tools-extra/clangd/Headers.cpp >> > index 5231a47487bc7..52b954e921620 100644 >> > --- a/clang-tools-extra/clangd/Headers.cpp >> > +++ b/clang-tools-extra/clangd/Headers.cpp >> > @@ -22,9 +22,17 @@ >> > namespace clang { >> > namespace clangd { >> > >> > -const char IWYUPragmaKeep[] = "// IWYU pragma: keep"; >> > -const char IWYUPragmaExport[] = "// IWYU pragma: export"; >> > -const char IWYUPragmaBeginExports[] = "// IWYU pragma: begin_exports"; >> > +llvm::Optional<StringRef> parseIWYUPragma(const char *Text) { >> > + // This gets called for every comment seen in the preamble, so it's >> > quite hot. >> > + constexpr llvm::StringLiteral IWYUPragma = "// IWYU pragma: "; >> > + if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size())) >> > + return llvm::None; >> > + Text += IWYUPragma.size(); >> > + const char *End = Text; >> > + while (*End != 0 && *End != '\n') >> > + ++End; >> > + return StringRef(Text, End - Text); >> > +} >> > >> > class IncludeStructure::RecordHeaders : public PPCallbacks, >> > public CommentHandler { >> > @@ -129,10 +137,10 @@ class IncludeStructure::RecordHeaders : public >> > PPCallbacks, >> > } >> > >> > bool HandleComment(Preprocessor &PP, SourceRange Range) override { >> > - bool Err = false; >> > - llvm::StringRef Text = SM.getCharacterData(Range.getBegin(), &Err); >> > - if (Err) >> > + auto Pragma = parseIWYUPragma(SM.getCharacterData(Range.getBegin())); >> > + if (!Pragma) >> > return false; >> > + >> > if (inMainFile()) { >> > // Given: >> > // >> > @@ -150,8 +158,7 @@ class IncludeStructure::RecordHeaders : public >> > PPCallbacks, >> > // will know that the next inclusion is behind the IWYU pragma. >> > // FIXME: Support "IWYU pragma: begin_exports" and "IWYU pragma: >> > // end_exports". >> > - if (!Text.startswith(IWYUPragmaExport) && >> > - !Text.startswith(IWYUPragmaKeep)) >> > + if (!Pragma->startswith("export") && !Pragma->startswith("keep")) >> > return false; >> > unsigned Offset = SM.getFileOffset(Range.getBegin()); >> > LastPragmaKeepInMainFileLine = >> > @@ -161,8 +168,7 @@ class IncludeStructure::RecordHeaders : public >> > PPCallbacks, >> > // does not support them properly yet, so they will be not marked as >> > // unused. >> > // FIXME: Once IncludeCleaner supports export pragmas, remove this. >> > - if (!Text.startswith(IWYUPragmaExport) && >> > - !Text.startswith(IWYUPragmaBeginExports)) >> > + if (!Pragma->startswith("export") && >> > !Pragma->startswith("begin_exports")) >> > return false; >> > Out->HasIWYUExport.insert( >> > >> > *Out->getID(SM.getFileEntryForID(SM.getFileID(Range.getBegin())))); >> > >> > diff --git a/clang-tools-extra/clangd/Headers.h >> > b/clang-tools-extra/clangd/Headers.h >> > index ff3f063168325..ba72ad397bf8f 100644 >> > --- a/clang-tools-extra/clangd/Headers.h >> > +++ b/clang-tools-extra/clangd/Headers.h >> > @@ -35,6 +35,12 @@ namespace clangd { >> > /// Returns true if \p Include is literal include like "path" or <path>. >> > bool isLiteralInclude(llvm::StringRef Include); >> > >> > +/// If Text begins an Include-What-You-Use directive, returns it. >> > +/// Given "// IWYU pragma: keep", returns "keep". >> > +/// Input is a null-terminated char* as provided by SM.getCharacterData(). >> > +/// (This should not be StringRef as we do *not* want to scan for its >> > length). >> > +llvm::Optional<StringRef> parseIWYUPragma(const char *Text); >> > + >> > /// Represents a header file to be #include'd. >> > struct HeaderFile { >> > std::string File; >> > >> > diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp >> > b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp >> > index b0ae42a96ecf4..8568079ca1adb 100644 >> > --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp >> > +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp >> > @@ -17,8 +17,6 @@ >> > namespace clang { >> > namespace clangd { >> > namespace { >> > -const char IWYUPragma[] = "// IWYU pragma: private, include "; >> > - >> > const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = { >> > {"include/__stddef_max_align_t.h", "<cstddef>"}, >> > {"include/__wmmintrin_aes.h", "<wmmintrin.h>"}, >> > @@ -712,17 +710,17 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) { >> > PragmaCommentHandler(CanonicalIncludes *Includes) : >> > Includes(Includes) {} >> > >> > bool HandleComment(Preprocessor &PP, SourceRange Range) override { >> > - llvm::StringRef Text = >> > - Lexer::getSourceText(CharSourceRange::getCharRange(Range), >> > - PP.getSourceManager(), PP.getLangOpts()); >> > - if (!Text.consume_front(IWYUPragma)) >> > + auto Pragma = parseIWYUPragma( >> > + PP.getSourceManager().getCharacterData(Range.getBegin())); >> > + if (!Pragma || !Pragma->consume_front("private, include ")) >> > return false; >> > auto &SM = PP.getSourceManager(); >> > // We always insert using the spelling from the pragma. >> > if (auto *FE = SM.getFileEntryForID(SM.getFileID(Range.getBegin()))) >> > - Includes->addMapping( >> > - FE->getLastRef(), >> > - isLiteralInclude(Text) ? Text.str() : ("\"" + Text + >> > "\"").str()); >> > + Includes->addMapping(FE->getLastRef(), >> > + isLiteralInclude(*Pragma) >> > + ? Pragma->str() >> > + : ("\"" + *Pragma + "\"").str()); >> > return false; >> > } >> > >> > >> > diff --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp >> > b/clang-tools-extra/clangd/unittests/HeadersTests.cpp >> > index 32e4aea15490b..324d4b58a1ef1 100644 >> > --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp >> > +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp >> > @@ -9,6 +9,7 @@ >> > #include "Headers.h" >> > >> > #include "Compiler.h" >> > +#include "Matchers.h" >> > #include "TestFS.h" >> > #include "TestTU.h" >> > #include "clang/Basic/TokenKinds.h" >> > @@ -30,6 +31,7 @@ namespace { >> > using ::testing::AllOf; >> > using ::testing::Contains; >> > using ::testing::ElementsAre; >> > +using ::testing::Eq; >> > using ::testing::IsEmpty; >> > using ::testing::Not; >> > using ::testing::UnorderedElementsAre; >> > @@ -445,6 +447,18 @@ TEST_F(HeadersTest, HasIWYUPragmas) { >> > EXPECT_FALSE(Includes.hasIWYUExport(getID("none.h", Includes))); >> > } >> > >> > +TEST(Headers, ParseIWYUPragma) { >> > + EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep"), >> > HasValue(Eq("keep"))); >> > + EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep\netc"), >> > + HasValue(Eq("keep"))); >> > + EXPECT_EQ(parseIWYUPragma("/* IWYU pragma: keep"), llvm::None) >> > + << "Only // comments supported!"; >> > + EXPECT_EQ(parseIWYUPragma("// IWYU pragma: keep"), llvm::None) >> > + << "Sensitive to whitespace"; >> > + EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), llvm::None) >> > + << "Sensitive to whitespace"; >> > +} >> > + >> > } // namespace >> > } // namespace clangd >> > } // namespace clang >> > >> > >> > >> > _______________________________________________ >> > cfe-commits mailing list >> > cfe-commits@lists.llvm.org >> > https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits