llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang <details> <summary>Changes</summary> Add some primitive syntax highlighting to our code snippet output. Before:  After:  _Obviously_ this is kinda WIP and more of a hack in general, but IMO it increases readability of the source snippets (which people look at highlighted all the time anyway...) and LLDB does something similar, so let's see. -- Full diff: https://github.com/llvm/llvm-project/pull/66514.diff 5 Files Affected: - (added) clang/include/clang/Frontend/CodeSnippetHighlighter.h (+46) - (modified) clang/include/clang/Frontend/TextDiagnostic.h (+3-1) - (modified) clang/lib/Frontend/CMakeLists.txt (+1) - (added) clang/lib/Frontend/CodeSnippetHighlighter.cpp (+120) - (modified) clang/lib/Frontend/TextDiagnostic.cpp (+29-2) <pre> diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h new file mode 100644 index 000000000000000..776954b59e2e1a8 --- /dev/null +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -0,0 +1,46 @@ +//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H +#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H + +#include &quot;clang/Basic/LangOptions.h&quot; +#include &quot;llvm/ADT/SmallSet.h&quot; +#include &quot;llvm/Support/raw_ostream.h&quot; +#include &lt;vector&gt; + +namespace clang { + +struct StyleRange { + unsigned Start; + unsigned End; + const enum llvm::raw_ostream::Colors c; +}; + +class CodeSnippetHighlighter final { +public: + CodeSnippetHighlighter() = default; + + /// Produce StyleRanges for the given line. + /// The returned vector contains non-overlapping style ranges. They are sorted + /// from beginning of the line to the end. + std::vector&lt;StyleRange&gt; highlightLine(llvm::StringRef SourceLine, + const LangOptions &amp;LangOpts); + +private: + bool Initialized = false; + /// Fills Keywords and Literals. + void ensureTokenData(); + + llvm::SmallSet&lt;StringRef, 12&gt; Keywords; + llvm::SmallSet&lt;StringRef, 12&gt; Literals; +}; + +} // namespace clang + +#endif diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 7eb0ab0cdc9bca8..39e09fe553dd4b9 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H +#include &quot;clang/Frontend/CodeSnippetHighlighter.h&quot; #include &quot;clang/Frontend/DiagnosticRenderer.h&quot; namespace clang { @@ -33,6 +34,7 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &amp;OS; + CodeSnippetHighlighter SnippetHighlighter; public: TextDiagnostic(raw_ostream &amp;OS, @@ -104,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer { ArrayRef&lt;FixItHint&gt; Hints); void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo); + unsigned LineNo, bool A, const SourceManager &amp;SM); void emitParseableFixits(ArrayRef&lt;FixItHint&gt; Hints, const SourceManager &amp;SM); }; diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index 1e5f0a859dfd568..f3547f771593093 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -42,6 +42,7 @@ add_clang_library(clangFrontend TextDiagnosticPrinter.cpp VerifyDiagnosticConsumer.cpp InterfaceStubFunctionsConsumer.cpp + CodeSnippetHighlighter.cpp DEPENDS ClangDriverOptions diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp new file mode 100644 index 000000000000000..829a533ad2692e5 --- /dev/null +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -0,0 +1,120 @@ + +#include &quot;clang/Frontend/CodeSnippetHighlighter.h&quot; +#include &quot;clang/Basic/DiagnosticOptions.h&quot; +#include &quot;clang/Basic/SourceManager.h&quot; +#include &quot;clang/Lex/Lexer.h&quot; +#include &quot;llvm/Support/raw_ostream.h&quot; + +using namespace clang; + +void CodeSnippetHighlighter::ensureTokenData() { + if (Initialized) + return; + + // List of keywords, literals and types we want to highlight. + // These are best-effort, as is everything we do wrt. highlighting. + Keywords.insert(&quot;_Static_assert&quot;); + Keywords.insert(&quot;auto&quot;); + Keywords.insert(&quot;concept&quot;); + Keywords.insert(&quot;const&quot;); + Keywords.insert(&quot;consteval&quot;); + Keywords.insert(&quot;constexpr&quot;); + Keywords.insert(&quot;delete&quot;); + Keywords.insert(&quot;do&quot;); + Keywords.insert(&quot;else&quot;); + Keywords.insert(&quot;final&quot;); + Keywords.insert(&quot;for&quot;); + Keywords.insert(&quot;if&quot;); + Keywords.insert(&quot;mutable&quot;); + Keywords.insert(&quot;namespace&quot;); + Keywords.insert(&quot;new&quot;); + Keywords.insert(&quot;private&quot;); + Keywords.insert(&quot;public&quot;); + Keywords.insert(&quot;requires&quot;); + Keywords.insert(&quot;return&quot;); + Keywords.insert(&quot;static&quot;); + Keywords.insert(&quot;static_assert&quot;); + Keywords.insert(&quot;using&quot;); + Keywords.insert(&quot;void&quot;); + Keywords.insert(&quot;volatile&quot;); + Keywords.insert(&quot;while&quot;); + + // Builtin types we highlight + Keywords.insert(&quot;void&quot;); + Keywords.insert(&quot;char&quot;); + Keywords.insert(&quot;short&quot;); + Keywords.insert(&quot;int&quot;); + Keywords.insert(&quot;unsigned&quot;); + Keywords.insert(&quot;long&quot;); + Keywords.insert(&quot;float&quot;); + Keywords.insert(&quot;double&quot;); + + Literals.insert(&quot;true&quot;); + Literals.insert(&quot;false&quot;); + Literals.insert(&quot;nullptr&quot;); + + Initialized = true; +} + +static SourceManager createTempSourceManager() { + FileSystemOptions FileOpts; + FileManager FileMgr(FileOpts); + llvm::IntrusiveRefCntPtr&lt;DiagnosticIDs&gt; DiagIDs(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr&lt;DiagnosticOptions&gt; DiagOpts(new DiagnosticOptions()); + DiagnosticsEngine diags(DiagIDs, DiagOpts); + return SourceManager(diags, FileMgr); +} + +static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &amp;FakeSM, + const LangOptions &amp;LangOpts) { + return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); +} + +std::vector&lt;StyleRange&gt; +CodeSnippetHighlighter::highlightLine(StringRef SourceLine, + const LangOptions &amp;LangOpts) { + ensureTokenData(); + + constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); + SourceManager FakeSM = createTempSourceManager(); + Lexer L = createTempLexer(MemBuf-&gt;getMemBufferRef(), FakeSM, LangOpts); + L.SetKeepWhitespaceMode(true); + + std::vector&lt;StyleRange&gt; Styles; + bool Stop = false; + while (!Stop) { + Token tok; + Stop = L.LexFromRawLexer(tok); + if (tok.is(tok::unknown)) + continue; + + bool Invalid; + unsigned Start = + FakeSM.getSpellingColumnNumber(tok.getLocation(), &amp;Invalid) - 1; + if (Invalid) + continue; + + if (tok.is(tok::raw_identifier)) { + // Almost everything we lex is an identifier, since we use a raw lexer. + // Some should be highlightes as literals, others as keywords. + if (Keywords.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), KeywordColor}); + else if (Literals.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok::isLiteral(tok.getKind())) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok.is(tok::comment)) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), CommentColor}); + } + } + + return Styles; +} diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index eaa6e8d29a1dece..a7a2405d9bbae6d 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -11,6 +11,7 @@ #include &quot;clang/Basic/DiagnosticOptions.h&quot; #include &quot;clang/Basic/FileManager.h&quot; #include &quot;clang/Basic/SourceManager.h&quot; +#include &quot;clang/Frontend/CodeSnippetHighlighter.h&quot; #include &quot;clang/Lex/Lexer.h&quot; #include &quot;llvm/ADT/SmallString.h&quot; #include &quot;llvm/ADT/StringExtras.h&quot; @@ -1248,7 +1249,7 @@ void TextDiagnostic::emitSnippetAndCaret( } // Emit what we have computed. - emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo); + emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo, false, SM); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1278,7 +1279,11 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo) { + unsigned LineNo, bool IsSlash, + const SourceManager &amp;SM) { + std::vector&lt;StyleRange&gt; Styles = + SnippetHighlighter.highlightLine(SourceLine, LangOpts); + // Emit line number. if (MaxLineNoDisplayWidth &gt; 0) { unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); @@ -1288,11 +1293,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, // Print the source line one character at a time. bool PrintReversed = false; + bool HighlightingEnabled = DiagOpts-&gt;ShowColors; size_t I = 0; while (I &lt; SourceLine.size()) { auto [Str, WasPrintable] = printableTextForNextCharacter(SourceLine, &amp;I, DiagOpts-&gt;TabStop); + // Just stop highlighting anything for this line if we found a non-printable + // character. + if (!WasPrintable) + HighlightingEnabled = false; + + // FIXME: I hope we can do this in some nicer way. + if (HighlightingEnabled) { + std::optional&lt;enum raw_ostream::Colors&gt; H; + for (auto &amp;P : Styles) { + if (P.Start &lt; I &amp;&amp; P.End &gt;= I) { + H = P.c; + break; + } + } + + if (H) { + OS.changeColor(*H, false); + } else + OS.resetColor(); + } + // Toggle inverted colors on or off for this character. if (DiagOpts-&gt;ShowColors) { if (WasPrintable == PrintReversed) { </pre> </details> https://github.com/llvm/llvm-project/pull/66514 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits