Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com>, Timm =?utf-8?q?Bäder?= <tbae...@redhat.com> Message-ID: In-Reply-To: <llvm/llvm-project/pull/66514/cl...@github.com>
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/66514 >From 259ce1a3febe0c3996ecd7e55b9e79381eba8f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 15 Sep 2023 15:51:39 +0200 Subject: [PATCH 01/10] [clang][Diagnostics] Highlight code snippets Add some primitive syntax highlighting to our code snippet output. --- .../clang/Frontend/CodeSnippetHighlighter.h | 46 +++++++ clang/include/clang/Frontend/TextDiagnostic.h | 2 + clang/lib/Frontend/CMakeLists.txt | 1 + clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++ clang/lib/Frontend/TextDiagnostic.cpp | 26 ++++ 5 files changed, 195 insertions(+) create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h new file mode 100644 index 000000000000000..776954b59e2e1a8 --- /dev/null +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -0,0 +1,46 @@ +//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H +#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H + +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> + +namespace clang { + +struct StyleRange { + unsigned Start; + unsigned End; + const enum llvm::raw_ostream::Colors c; +}; + +class CodeSnippetHighlighter final { +public: + CodeSnippetHighlighter() = default; + + /// Produce StyleRanges for the given line. + /// The returned vector contains non-overlapping style ranges. They are sorted + /// from beginning of the line to the end. + std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + const LangOptions &LangOpts); + +private: + bool Initialized = false; + /// Fills Keywords and Literals. + void ensureTokenData(); + + llvm::SmallSet<StringRef, 12> Keywords; + llvm::SmallSet<StringRef, 12> Literals; +}; + +} // namespace clang + +#endif diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 7eb0ab0cdc9bca8..59fd4d4f9408d48 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Frontend/DiagnosticRenderer.h" namespace clang { @@ -33,6 +34,7 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + CodeSnippetHighlighter SnippetHighlighter; public: TextDiagnostic(raw_ostream &OS, diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index 1e5f0a859dfd568..f3547f771593093 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -42,6 +42,7 @@ add_clang_library(clangFrontend TextDiagnosticPrinter.cpp VerifyDiagnosticConsumer.cpp InterfaceStubFunctionsConsumer.cpp + CodeSnippetHighlighter.cpp DEPENDS ClangDriverOptions diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp new file mode 100644 index 000000000000000..829a533ad2692e5 --- /dev/null +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -0,0 +1,120 @@ + +#include "clang/Frontend/CodeSnippetHighlighter.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; + +void CodeSnippetHighlighter::ensureTokenData() { + if (Initialized) + return; + + // List of keywords, literals and types we want to highlight. + // These are best-effort, as is everything we do wrt. highlighting. + Keywords.insert("_Static_assert"); + Keywords.insert("auto"); + Keywords.insert("concept"); + Keywords.insert("const"); + Keywords.insert("consteval"); + Keywords.insert("constexpr"); + Keywords.insert("delete"); + Keywords.insert("do"); + Keywords.insert("else"); + Keywords.insert("final"); + Keywords.insert("for"); + Keywords.insert("if"); + Keywords.insert("mutable"); + Keywords.insert("namespace"); + Keywords.insert("new"); + Keywords.insert("private"); + Keywords.insert("public"); + Keywords.insert("requires"); + Keywords.insert("return"); + Keywords.insert("static"); + Keywords.insert("static_assert"); + Keywords.insert("using"); + Keywords.insert("void"); + Keywords.insert("volatile"); + Keywords.insert("while"); + + // Builtin types we highlight + Keywords.insert("void"); + Keywords.insert("char"); + Keywords.insert("short"); + Keywords.insert("int"); + Keywords.insert("unsigned"); + Keywords.insert("long"); + Keywords.insert("float"); + Keywords.insert("double"); + + Literals.insert("true"); + Literals.insert("false"); + Literals.insert("nullptr"); + + Initialized = true; +} + +static SourceManager createTempSourceManager() { + FileSystemOptions FileOpts; + FileManager FileMgr(FileOpts); + llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions()); + DiagnosticsEngine diags(DiagIDs, DiagOpts); + return SourceManager(diags, FileMgr); +} + +static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, + const LangOptions &LangOpts) { + return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); +} + +std::vector<StyleRange> +CodeSnippetHighlighter::highlightLine(StringRef SourceLine, + const LangOptions &LangOpts) { + ensureTokenData(); + + constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); + SourceManager FakeSM = createTempSourceManager(); + Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); + L.SetKeepWhitespaceMode(true); + + std::vector<StyleRange> Styles; + bool Stop = false; + while (!Stop) { + Token tok; + Stop = L.LexFromRawLexer(tok); + if (tok.is(tok::unknown)) + continue; + + bool Invalid; + unsigned Start = + FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + + if (tok.is(tok::raw_identifier)) { + // Almost everything we lex is an identifier, since we use a raw lexer. + // Some should be highlightes as literals, others as keywords. + if (Keywords.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), KeywordColor}); + else if (Literals.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok::isLiteral(tok.getKind())) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok.is(tok::comment)) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), CommentColor}); + } + } + + return Styles; +} diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 779dead5d058d1a..13d7d1e048cf991 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -1278,6 +1279,9 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, unsigned LineNo) { + std::vector<StyleRange> Styles = + SnippetHighlighter.highlightLine(SourceLine, LangOpts); + // Emit line number. if (MaxLineNoDisplayWidth > 0) { unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); @@ -1287,11 +1291,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, // Print the source line one character at a time. bool PrintReversed = false; + bool HighlightingEnabled = DiagOpts->ShowColors; size_t I = 0; while (I < SourceLine.size()) { auto [Str, WasPrintable] = printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop); + // Just stop highlighting anything for this line if we found a non-printable + // character. + if (!WasPrintable) + HighlightingEnabled = false; + + // FIXME: I hope we can do this in some nicer way. + if (HighlightingEnabled) { + std::optional<enum raw_ostream::Colors> H; + for (auto &P : Styles) { + if (P.Start < I && P.End >= I) { + H = P.c; + break; + } + } + + if (H) { + OS.changeColor(*H, false); + } else + OS.resetColor(); + } + // Toggle inverted colors on or off for this character. if (DiagOpts->ShowColors) { if (WasPrintable == PrintReversed) { >From dfbf4ae06eccde42cc1aec205021ea41eba6f02b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 20 Sep 2023 15:28:10 +0200 Subject: [PATCH 02/10] Get identifier table from Preprocessor --- .../clang/Frontend/CodeSnippetHighlighter.h | 11 +- clang/include/clang/Frontend/TextDiagnostic.h | 7 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 101 +++++------------- clang/lib/Frontend/TextDiagnostic.cpp | 8 +- clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +- 5 files changed, 39 insertions(+), 90 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index 776954b59e2e1a8..ec03375221f9ffc 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -22,6 +22,8 @@ struct StyleRange { const enum llvm::raw_ostream::Colors c; }; +class Preprocessor; + class CodeSnippetHighlighter final { public: CodeSnippetHighlighter() = default; @@ -30,15 +32,8 @@ class CodeSnippetHighlighter final { /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + const Preprocessor *PP, const LangOptions &LangOpts); - -private: - bool Initialized = false; - /// Fills Keywords and Literals. - void ensureTokenData(); - - llvm::SmallSet<StringRef, 12> Keywords; - llvm::SmallSet<StringRef, 12> Literals; }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 59fd4d4f9408d48..8cdb9b141a8a4af 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -19,7 +19,6 @@ #include "clang/Frontend/DiagnosticRenderer.h" namespace clang { - /// Class to encapsulate the logic for formatting and printing a textual /// diagnostic message. /// @@ -34,12 +33,12 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + const Preprocessor *PP; CodeSnippetHighlighter SnippetHighlighter; public: - TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, - DiagnosticOptions *DiagOpts); + TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + const Preprocessor *PP, DiagnosticOptions *DiagOpts); ~TextDiagnostic() override; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 829a533ad2692e5..63b3707fbb7ef83 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -3,59 +3,12 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" #include "llvm/Support/raw_ostream.h" using namespace clang; -void CodeSnippetHighlighter::ensureTokenData() { - if (Initialized) - return; - - // List of keywords, literals and types we want to highlight. - // These are best-effort, as is everything we do wrt. highlighting. - Keywords.insert("_Static_assert"); - Keywords.insert("auto"); - Keywords.insert("concept"); - Keywords.insert("const"); - Keywords.insert("consteval"); - Keywords.insert("constexpr"); - Keywords.insert("delete"); - Keywords.insert("do"); - Keywords.insert("else"); - Keywords.insert("final"); - Keywords.insert("for"); - Keywords.insert("if"); - Keywords.insert("mutable"); - Keywords.insert("namespace"); - Keywords.insert("new"); - Keywords.insert("private"); - Keywords.insert("public"); - Keywords.insert("requires"); - Keywords.insert("return"); - Keywords.insert("static"); - Keywords.insert("static_assert"); - Keywords.insert("using"); - Keywords.insert("void"); - Keywords.insert("volatile"); - Keywords.insert("while"); - - // Builtin types we highlight - Keywords.insert("void"); - Keywords.insert("char"); - Keywords.insert("short"); - Keywords.insert("int"); - Keywords.insert("unsigned"); - Keywords.insert("long"); - Keywords.insert("float"); - Keywords.insert("double"); - - Literals.insert("true"); - Literals.insert("false"); - Literals.insert("nullptr"); - - Initialized = true; -} - static SourceManager createTempSourceManager() { FileSystemOptions FileOpts; FileManager FileMgr(FileOpts); @@ -70,49 +23,51 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); } -std::vector<StyleRange> -CodeSnippetHighlighter::highlightLine(StringRef SourceLine, - const LangOptions &LangOpts) { - ensureTokenData(); - +std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( + StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; - const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); SourceManager FakeSM = createTempSourceManager(); + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); L.SetKeepWhitespaceMode(true); std::vector<StyleRange> Styles; bool Stop = false; while (!Stop) { - Token tok; - Stop = L.LexFromRawLexer(tok); - if (tok.is(tok::unknown)) + Token T; + Stop = L.LexFromRawLexer(T); + if (T.is(tok::unknown)) continue; bool Invalid; unsigned Start = - FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1; + FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; - if (tok.is(tok::raw_identifier)) { - // Almost everything we lex is an identifier, since we use a raw lexer. - // Some should be highlightes as literals, others as keywords. - if (Keywords.contains(tok.getRawIdentifier())) - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), KeywordColor}); - else if (Literals.contains(tok.getRawIdentifier())) + if (T.is(tok::raw_identifier)) { + StringRef RawIdent = T.getRawIdentifier(); + // Special case true/false/nullptr literals, since they will otherwise be + // treated as keywords. + if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { Styles.push_back( - StyleRange{Start, Start + tok.getLength(), LiteralColor}); - } else if (tok::isLiteral(tok.getKind())) { - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), LiteralColor}); - } else if (tok.is(tok::comment)) { - Styles.push_back( - StyleRange{Start, Start + tok.getLength(), CommentColor}); + StyleRange{Start, Start + T.getLength(), LiteralColor}); + } else { + const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); + assert(II); + + if (II->isKeyword(LangOpts)) { + Styles.push_back( + StyleRange{Start, Start + T.getLength(), KeywordColor}); + } + } + } else if (tok::isLiteral(T.getKind())) { + Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor}); + } else if (T.is(tok::comment)) { + Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor}); } } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 13d7d1e048cf991..e840cdd952d09f1 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -645,10 +645,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, return Wrapped; } -TextDiagnostic::TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, +TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + const Preprocessor *PP, DiagnosticOptions *DiagOpts) - : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {} + : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} TextDiagnostic::~TextDiagnostic() {} @@ -1280,7 +1280,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, unsigned LineNo) { std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(SourceLine, LangOpts); + SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts); // Emit line number. if (MaxLineNoDisplayWidth > 0) { diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 0ff5376098ffe8d..3bc3935078baada 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { // Build the TextDiagnostic utility. - TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts)); + TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts)); } void TextDiagnosticPrinter::EndSourceFile() { >From 7468f0a1441216987134a75de453924ac0bf9495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 20 Sep 2023 17:24:42 +0200 Subject: [PATCH 03/10] Move the PP parameter to the end of the TextDiagnostic ctor --- clang/include/clang/Frontend/TextDiagnostic.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 2 ++ clang/lib/Frontend/TextDiagnostic.cpp | 4 ++-- clang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 8cdb9b141a8a4af..43c39ff96a2d1ce 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -38,7 +38,7 @@ class TextDiagnostic : public DiagnosticRenderer { public: TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, - const Preprocessor *PP, DiagnosticOptions *DiagOpts); + DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr); ~TextDiagnostic() override; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 63b3707fbb7ef83..32bd61f3746023c 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -25,6 +25,8 @@ static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { + if (!PP) + return {}; constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index e840cdd952d09f1..c9207e9dfbf9217 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -646,8 +646,8 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, } TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, - const Preprocessor *PP, - DiagnosticOptions *DiagOpts) + DiagnosticOptions *DiagOpts, + const Preprocessor *PP) : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} TextDiagnostic::~TextDiagnostic() {} diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 3bc3935078baada..b2fb762537573ef 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { // Build the TextDiagnostic utility. - TextDiag.reset(new TextDiagnostic(OS, LO, PP, &*DiagOpts)); + TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP)); } void TextDiagnosticPrinter::EndSourceFile() { >From 133187693d0203c8e58f72c7cccf7fce35046f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Thu, 21 Sep 2023 06:38:24 +0200 Subject: [PATCH 04/10] Tune colors --- clang/include/clang/Frontend/CodeSnippetHighlighter.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 6 +++--- clang/lib/Frontend/TextDiagnostic.cpp | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index ec03375221f9ffc..c2a0184085d5da4 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -19,7 +19,7 @@ namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors c; + const enum llvm::raw_ostream::Colors color; }; class Preprocessor; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 32bd61f3746023c..dba7f5d2848505a 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -27,9 +27,9 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { if (!PP) return {}; - constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; - constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; - constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; SourceManager FakeSM = createTempSourceManager(); const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index c9207e9dfbf9217..35a92a8044f2e52 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <optional> @@ -1307,14 +1308,14 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, std::optional<enum raw_ostream::Colors> H; for (auto &P : Styles) { if (P.Start < I && P.End >= I) { - H = P.c; + H = P.color; break; } } - if (H) { + if (H) OS.changeColor(*H, false); - } else + else OS.resetColor(); } >From b31b5725e7ed0a6947c957ff1b18df124f6c839d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Thu, 21 Sep 2023 11:01:43 +0200 Subject: [PATCH 05/10] Lex the entire file --- .../clang/Frontend/CodeSnippetHighlighter.h | 7 +- clang/include/clang/Frontend/TextDiagnostic.h | 3 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 138 ++++++++++++------ clang/lib/Frontend/TextDiagnostic.cpp | 14 +- 4 files changed, 111 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index c2a0184085d5da4..51c14880fb95485 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -23,6 +23,8 @@ struct StyleRange { }; class Preprocessor; +class FileID; +class SourceManager; class CodeSnippetHighlighter final { public: @@ -31,9 +33,10 @@ class CodeSnippetHighlighter final { /// Produce StyleRanges for the given line. /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. - std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + std::vector<StyleRange> highlightLine(unsigned LineNumber, const Preprocessor *PP, - const LangOptions &LangOpts); + const LangOptions &LangOpts, FileID FID, + const SourceManager &SM); }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 43c39ff96a2d1ce..102b33aedd5ef98 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -105,7 +105,8 @@ class TextDiagnostic : public DiagnosticRenderer { ArrayRef<FixItHint> Hints); void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo); + FileID FID, const SourceManager &SM, unsigned LineNo, + unsigned DisplayLineNo); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index dba7f5d2848505a..d319e690a355b2b 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -9,34 +9,47 @@ using namespace clang; -static SourceManager createTempSourceManager() { - FileSystemOptions FileOpts; - FileManager FileMgr(FileOpts); - llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs()); - llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions()); - DiagnosticsEngine diags(DiagIDs, DiagOpts); - return SourceManager(diags, FileMgr); -} - -static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, - const LangOptions &LangOpts) { - return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); -} +static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; +static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( - StringRef SourceLine, const Preprocessor *PP, const LangOptions &LangOpts) { + unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, + FileID FID, const SourceManager &SM) { if (!PP) return {}; - constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; - constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; - constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; - SourceManager FakeSM = createTempSourceManager(); - const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); - Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); + // Classify the given token and append it to the given vector. + auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, + const Token &T, unsigned Start, + unsigned Length) -> void { + if (T.is(tok::raw_identifier)) { + StringRef RawIdent = T.getRawIdentifier(); + // Special case true/false/nullptr literals, since they will otherwise be + // treated as keywords. + if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { + Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + } else { + const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); + assert(II); + + if (II->isKeyword(LangOpts)) { + Vec.push_back(StyleRange{Start, Start + Length, KeywordColor}); + } + } + } else if (tok::isLiteral(T.getKind())) { + Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); + } else if (T.is(tok::comment)) { + Vec.push_back(StyleRange{Start, Start + Length, CommentColor}); + } + }; + + auto Buff = SM.getBufferOrNone(FID); + assert(Buff); + Lexer L = Lexer(FID, *Buff, SM, LangOpts); L.SetKeepWhitespaceMode(true); + std::vector<std::vector<StyleRange>> Lines; - std::vector<StyleRange> Styles; bool Stop = false; while (!Stop) { Token T; @@ -45,33 +58,74 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( continue; bool Invalid; - unsigned Start = - FakeSM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + unsigned StartCol = + SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + unsigned StartLine = + SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; - if (T.is(tok::raw_identifier)) { - StringRef RawIdent = T.getRawIdentifier(); - // Special case true/false/nullptr literals, since they will otherwise be - // treated as keywords. - if (RawIdent == "true" || RawIdent == "false" || RawIdent == "nullptr") { - Styles.push_back( - StyleRange{Start, Start + T.getLength(), LiteralColor}); - } else { - const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); - assert(II); + while (Lines.size() <= StartLine) + Lines.push_back({}); - if (II->isKeyword(LangOpts)) { - Styles.push_back( - StyleRange{Start, Start + T.getLength(), KeywordColor}); - } + unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; + if (Invalid) + continue; + + // Simple tokens. + if (StartLine == EndLine) { + appendStyle(Lines[StartLine], T, StartCol, T.getLength()); + continue; + } + unsigned NumLines = EndLine - StartLine; + + // For tokens that span multiple lines (think multiline comments), we + // divide them into multiple StyleRanges. + unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; + if (Invalid) + continue; + + std::string Spelling = Lexer::getSpelling(T, SM, LangOpts); + + unsigned L = 0; + unsigned LineLength = 0; + for (unsigned I = 0; I <= Spelling.size(); ++I) { + // This line is done. + if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) { + while (Lines.size() <= StartLine + L) + Lines.push_back({}); + + if (L == 0) // First line + appendStyle(Lines[StartLine + L], T, StartCol, LineLength); + else if (L == NumLines) // Last line + appendStyle(Lines[StartLine + L], T, 0, EndCol); + else + appendStyle(Lines[StartLine + L], T, 0, LineLength); + ++L; + LineLength = 0; + continue; } - } else if (tok::isLiteral(T.getKind())) { - Styles.push_back(StyleRange{Start, Start + T.getLength(), LiteralColor}); - } else if (T.is(tok::comment)) { - Styles.push_back(StyleRange{Start, Start + T.getLength(), CommentColor}); + ++LineLength; + } + } + +#if 0 + llvm::errs() << "--\nLine Style info: \n"; + int I = 0; + for (std::vector<StyleRange> &Line : Lines) { + llvm::errs() << I << ": "; + for (const auto &R : Line) { + llvm::errs() << "{" << R.Start << ", " << R.End << "}, "; } + llvm::errs() << "\n"; + + ++I; } +#endif - return Styles; + while (Lines.size() <= LineNumber) + Lines.push_back({}); + return Lines[LineNumber]; } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 35a92a8044f2e52..5aea7b8f4210749 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1249,7 +1249,8 @@ void TextDiagnostic::emitSnippetAndCaret( } // Emit what we have computed. - emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo); + emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo, + DisplayLineNo); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1278,16 +1279,17 @@ void TextDiagnostic::emitSnippetAndCaret( } void TextDiagnostic::emitSnippet(StringRef SourceLine, - unsigned MaxLineNoDisplayWidth, - unsigned LineNo) { + unsigned MaxLineNoDisplayWidth, FileID FID, + const SourceManager &SM, unsigned LineNo, + unsigned DisplayLineNo) { std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(SourceLine, PP, LangOpts); + SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM); // Emit line number. if (MaxLineNoDisplayWidth > 0) { - unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); + unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo); OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1) - << LineNo << " | "; + << DisplayLineNo << " | "; } // Print the source line one character at a time. >From 955e2eac2063c328a929039e2e55a60ac2bf23c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 22 Sep 2023 06:48:55 +0200 Subject: [PATCH 06/10] Try to fix PCH test --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index d319e690a355b2b..8905fbfb29b8927 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -19,6 +19,10 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (!PP) return {}; + // Might cause emission of another diagnostic. + if (PP->getIdentifierTable().getExternalIdentifierLookup()) + return {}; + // Classify the given token and append it to the given vector. auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, const Token &T, unsigned Start, >From d9e555b19bce34c4d5f9eecebe42a80f0e5442e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Tue, 26 Sep 2023 08:11:58 +0200 Subject: [PATCH 07/10] Measurements --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 8905fbfb29b8927..17614a962ee6a7f 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -6,6 +6,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "llvm/Support/raw_ostream.h" +#include <chrono> using namespace clang; @@ -16,6 +17,9 @@ static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, FileID FID, const SourceManager &SM) { + std::chrono::steady_clock::time_point begin = + std::chrono::steady_clock::now(); + if (!PP) return {}; @@ -23,6 +27,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (PP->getIdentifierTable().getExternalIdentifierLookup()) return {}; + size_t NTokens = 0; // Classify the given token and append it to the given vector. auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, const Token &T, unsigned Start, @@ -56,6 +61,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( bool Stop = false; while (!Stop) { + ++NTokens; Token T; Stop = L.LexFromRawLexer(T); if (T.is(tok::unknown)) @@ -131,5 +137,23 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( while (Lines.size() <= LineNumber) Lines.push_back({}); + + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens + << " Tokens\n"; + llvm::errs() << "That took " + << std::chrono::duration_cast<std::chrono::microseconds>(end - + begin) + .count() + << " microseconds\n"; + llvm::errs() << "That took " + << std::chrono::duration_cast<std::chrono::milliseconds>(end - + begin) + .count() + << " milliseconds\n"; + llvm::errs() + << "That took " + << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() + << " seconds\n"; return Lines[LineNumber]; } >From a76184da5948dc03bcde405d1bb72c894e4268c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 27 Sep 2023 08:05:09 +0200 Subject: [PATCH 08/10] Slightly improve performance by bailing out earlier --- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 17614a962ee6a7f..71e5c30e56d146a 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -41,14 +41,13 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( } else { const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); assert(II); - - if (II->isKeyword(LangOpts)) { + if (II->isKeyword(LangOpts)) Vec.push_back(StyleRange{Start, Start + Length, KeywordColor}); - } } } else if (tok::isLiteral(T.getKind())) { Vec.push_back(StyleRange{Start, Start + Length, LiteralColor}); - } else if (T.is(tok::comment)) { + } else { + assert(T.is(tok::comment)); Vec.push_back(StyleRange{Start, Start + Length, CommentColor}); } }; @@ -67,6 +66,11 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (T.is(tok::unknown)) continue; + // We are only interested in identifiers, literals and comments. + if (!T.is(tok::raw_identifier) && !T.is(tok::comment) && + !tok::isLiteral(T.getKind())) + continue; + bool Invalid; unsigned StartCol = SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; @@ -138,6 +142,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( while (Lines.size() <= LineNumber) Lines.push_back({}); +#if 0 std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens << " Tokens\n"; @@ -155,5 +160,6 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( << "That took " << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " seconds\n"; +#endif return Lines[LineNumber]; } >From 06b0927eaf7883615e621e8b7ed20d95430127ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Wed, 27 Sep 2023 10:45:36 +0200 Subject: [PATCH 09/10] Only care about tokens that touch our LineNumber. --- .../clang/Frontend/CodeSnippetHighlighter.h | 2 +- clang/lib/Frontend/CodeSnippetHighlighter.cpp | 19 +++++++++++++------ clang/lib/Frontend/TextDiagnostic.cpp | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index 51c14880fb95485..a65bd3991d4eff2 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -19,7 +19,7 @@ namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors color; + const enum llvm::raw_ostream::Colors Color; }; class Preprocessor; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 71e5c30e56d146a..7663155c6c83923 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -71,23 +71,30 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( !tok::isLiteral(T.getKind())) continue; - bool Invalid; - unsigned StartCol = - SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + bool Invalid = false; + unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; if (Invalid) continue; + + if (EndLine < LineNumber) + continue; unsigned StartLine = SM.getSpellingLineNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; + if (StartLine > LineNumber) + break; - while (Lines.size() <= StartLine) - Lines.push_back({}); + // Must have an intersection at this point + assert(StartLine <= LineNumber && EndLine >= LineNumber); - unsigned EndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid) - 1; + unsigned StartCol = + SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; if (Invalid) continue; + while (Lines.size() <= StartLine) + Lines.push_back({}); // Simple tokens. if (StartLine == EndLine) { appendStyle(Lines[StartLine], T, StartCol, T.getLength()); diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 5aea7b8f4210749..f2793d23522f1a5 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -1310,7 +1310,7 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, std::optional<enum raw_ostream::Colors> H; for (auto &P : Styles) { if (P.Start < I && P.End >= I) { - H = P.color; + H = P.Color; break; } } >From f9c0202942f8db4b0935454c23bef880c3776b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 6 Oct 2023 15:28:25 +0200 Subject: [PATCH 10/10] Add checkpoints to Preprocessor --- .../clang/Frontend/CodeSnippetHighlighter.h | 13 ++-- clang/include/clang/Frontend/TextDiagnostic.h | 2 +- clang/include/clang/Lex/Preprocessor.h | 5 ++ clang/lib/Frontend/CodeSnippetHighlighter.cpp | 65 ++++++++++--------- clang/lib/Frontend/TextDiagnostic.cpp | 10 +-- clang/lib/Lex/Preprocessor.cpp | 32 +++++++++ 6 files changed, 86 insertions(+), 41 deletions(-) diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h index a65bd3991d4eff2..451a182b3e35317 100644 --- a/clang/include/clang/Frontend/CodeSnippetHighlighter.h +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -10,16 +10,15 @@ #define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H #include "clang/Basic/LangOptions.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" -#include <vector> namespace clang { struct StyleRange { unsigned Start; unsigned End; - const enum llvm::raw_ostream::Colors Color; + enum llvm::raw_ostream::Colors Color; }; class Preprocessor; @@ -33,10 +32,10 @@ class CodeSnippetHighlighter final { /// Produce StyleRanges for the given line. /// The returned vector contains non-overlapping style ranges. They are sorted /// from beginning of the line to the end. - std::vector<StyleRange> highlightLine(unsigned LineNumber, - const Preprocessor *PP, - const LangOptions &LangOpts, FileID FID, - const SourceManager &SM); + llvm::SmallVector<StyleRange> + highlightLine(unsigned LineNumber, const Preprocessor *PP, + const LangOptions &LangOpts, FileID FID, + const SourceManager &SM, const char *LineStart); }; } // namespace clang diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 102b33aedd5ef98..ecd5bb4a4f568dc 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -106,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer { void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, FileID FID, const SourceManager &SM, unsigned LineNo, - unsigned DisplayLineNo); + unsigned DisplayLineNo, const char *LineStart); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index a8d2599d94ebc66..37211d7b4778db0 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -128,6 +128,7 @@ enum MacroUse { class Preprocessor { friend class VAOptDefinitionContext; friend class VariadicMacroScopeGuard; + friend class CodeSnippetHighlighter; llvm::unique_function<void(const clang::Token &)> OnToken; std::shared_ptr<PreprocessorOptions> PPOpts; @@ -141,6 +142,10 @@ class Preprocessor { HeaderSearch &HeaderInfo; ModuleLoader &TheModuleLoader; + llvm::SmallVector<const char *> CheckPoints; + void saveCheckPoint(const char *P); + const char *getSaveFor(const char *S) const; + /// External source of macros. ExternalPreprocessorSource *ExternalSource; diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp index 7663155c6c83923..7dfc743f22e85c0 100644 --- a/clang/lib/Frontend/CodeSnippetHighlighter.cpp +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -14,9 +14,9 @@ static constexpr raw_ostream::Colors CommentColor = raw_ostream::GREEN; static constexpr raw_ostream::Colors LiteralColor = raw_ostream::CYAN; static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; -std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( +llvm::SmallVector<StyleRange> CodeSnippetHighlighter::highlightLine( unsigned LineNumber, const Preprocessor *PP, const LangOptions &LangOpts, - FileID FID, const SourceManager &SM) { + FileID FID, const SourceManager &SM, const char *LineStart) { std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); @@ -29,7 +29,7 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( size_t NTokens = 0; // Classify the given token and append it to the given vector. - auto appendStyle = [PP, &LangOpts](std::vector<StyleRange> &Vec, + auto appendStyle = [PP, &LangOpts](llvm::SmallVector<StyleRange> &Vec, const Token &T, unsigned Start, unsigned Length) -> void { if (T.is(tok::raw_identifier)) { @@ -52,11 +52,22 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( } }; + // Figure out where to start lexing from. + unsigned Offset = 0; + const char *Save = PP->getSaveFor(LineStart); + auto Buff = SM.getBufferOrNone(FID); assert(Buff); + + Offset = Save - Buff->getBufferStart(); + assert(Save >= Buff->getBufferStart()); + assert(Save <= Buff->getBufferEnd()); + Lexer L = Lexer(FID, *Buff, SM, LangOpts); L.SetKeepWhitespaceMode(true); - std::vector<std::vector<StyleRange>> Lines; + L.seek(Offset, /*IsAtStartOfLine=*/true); + + llvm::SmallVector<StyleRange> LineRanges; bool Stop = false; while (!Stop) { @@ -93,11 +104,9 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( if (Invalid) continue; - while (Lines.size() <= StartLine) - Lines.push_back({}); // Simple tokens. if (StartLine == EndLine) { - appendStyle(Lines[StartLine], T, StartCol, T.getLength()); + appendStyle(LineRanges, T, StartCol, T.getLength()); continue; } unsigned NumLines = EndLine - StartLine; @@ -115,15 +124,17 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( for (unsigned I = 0; I <= Spelling.size(); ++I) { // This line is done. if (Spelling[I] == '\n' || Spelling[I] == '\r' || I == Spelling.size()) { - while (Lines.size() <= StartLine + L) - Lines.push_back({}); - - if (L == 0) // First line - appendStyle(Lines[StartLine + L], T, StartCol, LineLength); - else if (L == NumLines) // Last line - appendStyle(Lines[StartLine + L], T, 0, EndCol); - else - appendStyle(Lines[StartLine + L], T, 0, LineLength); + if (StartLine + L == LineNumber) { + if (L == 0) // First line + appendStyle(LineRanges, T, StartCol, LineLength); + else if (L == NumLines) // Last line + appendStyle(LineRanges, T, 0, EndCol); + else + appendStyle(LineRanges, T, 0, LineLength); + + // We only do one line, so we're done. + break; + } ++L; LineLength = 0; continue; @@ -134,25 +145,21 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( #if 0 llvm::errs() << "--\nLine Style info: \n"; - int I = 0; - for (std::vector<StyleRange> &Line : Lines) { - llvm::errs() << I << ": "; - for (const auto &R : Line) { + //int I = 0; + //for (std::vector<StyleRange> &Line : Lines) { + //llvm::errs() << I << ": "; + for (const auto &R : LineRanges) { llvm::errs() << "{" << R.Start << ", " << R.End << "}, "; } llvm::errs() << "\n"; - ++I; - } + //++I; + //} #endif - while (Lines.size() <= LineNumber) - Lines.push_back({}); - -#if 0 +#if 1 std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - llvm::errs() << "Lexed " << Lines.size() << " lines and " << NTokens - << " Tokens\n"; + llvm::errs() << "Lexed " << NTokens << " Tokens\n"; llvm::errs() << "That took " << std::chrono::duration_cast<std::chrono::microseconds>(end - begin) @@ -168,5 +175,5 @@ std::vector<StyleRange> CodeSnippetHighlighter::highlightLine( << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " seconds\n"; #endif - return Lines[LineNumber]; + return LineRanges; } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index f2793d23522f1a5..cbc0cfacec20f0e 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" @@ -1250,7 +1251,7 @@ void TextDiagnostic::emitSnippetAndCaret( // Emit what we have computed. emitSnippet(SourceLine, MaxLineNoDisplayWidth, FID, SM, LineNo, - DisplayLineNo); + DisplayLineNo, LineStart); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1281,9 +1282,10 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, FileID FID, const SourceManager &SM, unsigned LineNo, - unsigned DisplayLineNo) { - std::vector<StyleRange> Styles = - SnippetHighlighter.highlightLine(LineNo - 1, PP, LangOpts, FID, SM); + unsigned DisplayLineNo, + const char *LineStart) { + llvm::SmallVector<StyleRange> Styles = SnippetHighlighter.highlightLine( + LineNo - 1, PP, LangOpts, FID, SM, LineStart); // Emit line number. if (MaxLineNoDisplayWidth > 0) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ede4c51487ffbe7..e40ba5f33272bec 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -546,6 +546,7 @@ void Preprocessor::EnterMainSourceFile() { // information) and predefined macros aren't guaranteed to be set properly. assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); FileID MainFileID = SourceMgr.getMainFileID(); + // llvm::errs() << "##### Main source file: " << (int)MainFileID << "\n"; // If MainFileID is loaded it means we loaded an AST file, no need to enter // a main file. @@ -878,15 +879,46 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { return true; } +void Preprocessor::saveCheckPoint(const char *P) { + static constexpr ptrdiff_t Limit = 1000; + if (CheckPoints.empty()) { + CheckPoints.push_back(P); + return; + } + + const char *Cur = CheckPoints.back(); + if (Cur == P) + return; + if ((P - Cur) > Limit) + CheckPoints.push_back(P); +} + +const char *Preprocessor::getSaveFor(const char *S) const { + const char *C = S; + // FIXME: Use std::lower_bound or something smart. Aaron knows what I'm + // talking about. + for (ssize_t I = CheckPoints.size() - 1; I >= 0; --I) { + C = CheckPoints[I]; + if (CheckPoints[I] <= S) + break; + } + return C; +} + void Preprocessor::Lex(Token &Result) { ++LexLevel; // We loop here until a lex function returns a token; this avoids recursion. bool ReturnedToken; + // const char *Save = nullptr; do { switch (CurLexerKind) { case CLK_Lexer: ReturnedToken = CurLexer->Lex(Result); + if (ReturnedToken && CurLexer && + CurLexer->getFileID() == SourceMgr.getMainFileID()) { + saveCheckPoint(CurLexer->BufferPtr); + } break; case CLK_TokenLexer: ReturnedToken = CurTokenLexer->Lex(Result); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits