https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/66514
Add some primitive syntax highlighting to our code snippet output. Before:  After:  _Obviously_ this is kinda WIP and more of a hack in general, but IMO it increases readability of the source snippets (which people look at highlighted all the time anyway...) and LLDB does something similar, so let's see. >From b4f4d29c8780752629f78b4debb86bd9f9dff0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 15 Sep 2023 15:51:39 +0200 Subject: [PATCH] [clang][Diagnostics] Highlight code snippets Add some primitive syntax highlighting to our code snippet output. --- .../clang/Frontend/CodeSnippetHighlighter.h | 46 +++++++ clang/include/clang/Frontend/TextDiagnostic.h | 4 +- clang/lib/Frontend/CMakeLists.txt | 1 + clang/lib/Frontend/CodeSnippetHighlighter.cpp | 120 ++++++++++++++++++ clang/lib/Frontend/TextDiagnostic.cpp | 31 ++++- 5 files changed, 199 insertions(+), 3 deletions(-) create mode 100644 clang/include/clang/Frontend/CodeSnippetHighlighter.h create mode 100644 clang/lib/Frontend/CodeSnippetHighlighter.cpp diff --git a/clang/include/clang/Frontend/CodeSnippetHighlighter.h b/clang/include/clang/Frontend/CodeSnippetHighlighter.h new file mode 100644 index 000000000000000..776954b59e2e1a8 --- /dev/null +++ b/clang/include/clang/Frontend/CodeSnippetHighlighter.h @@ -0,0 +1,46 @@ +//===--- CodeSnippetHighlighter.h - Code snippet highlighting ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H +#define LLVM_CLANG_FRONTEND_CODESNIPPETHIGHLIGHTER_H + +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" +#include <vector> + +namespace clang { + +struct StyleRange { + unsigned Start; + unsigned End; + const enum llvm::raw_ostream::Colors c; +}; + +class CodeSnippetHighlighter final { +public: + CodeSnippetHighlighter() = default; + + /// Produce StyleRanges for the given line. + /// The returned vector contains non-overlapping style ranges. They are sorted + /// from beginning of the line to the end. + std::vector<StyleRange> highlightLine(llvm::StringRef SourceLine, + const LangOptions &LangOpts); + +private: + bool Initialized = false; + /// Fills Keywords and Literals. + void ensureTokenData(); + + llvm::SmallSet<StringRef, 12> Keywords; + llvm::SmallSet<StringRef, 12> Literals; +}; + +} // namespace clang + +#endif diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 7eb0ab0cdc9bca8..39e09fe553dd4b9 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Frontend/DiagnosticRenderer.h" namespace clang { @@ -33,6 +34,7 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + CodeSnippetHighlighter SnippetHighlighter; public: TextDiagnostic(raw_ostream &OS, @@ -104,7 +106,7 @@ class TextDiagnostic : public DiagnosticRenderer { ArrayRef<FixItHint> Hints); void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo); + unsigned LineNo, bool A, const SourceManager &SM); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt index 1e5f0a859dfd568..f3547f771593093 100644 --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -42,6 +42,7 @@ add_clang_library(clangFrontend TextDiagnosticPrinter.cpp VerifyDiagnosticConsumer.cpp InterfaceStubFunctionsConsumer.cpp + CodeSnippetHighlighter.cpp DEPENDS ClangDriverOptions diff --git a/clang/lib/Frontend/CodeSnippetHighlighter.cpp b/clang/lib/Frontend/CodeSnippetHighlighter.cpp new file mode 100644 index 000000000000000..829a533ad2692e5 --- /dev/null +++ b/clang/lib/Frontend/CodeSnippetHighlighter.cpp @@ -0,0 +1,120 @@ + +#include "clang/Frontend/CodeSnippetHighlighter.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; + +void CodeSnippetHighlighter::ensureTokenData() { + if (Initialized) + return; + + // List of keywords, literals and types we want to highlight. + // These are best-effort, as is everything we do wrt. highlighting. + Keywords.insert("_Static_assert"); + Keywords.insert("auto"); + Keywords.insert("concept"); + Keywords.insert("const"); + Keywords.insert("consteval"); + Keywords.insert("constexpr"); + Keywords.insert("delete"); + Keywords.insert("do"); + Keywords.insert("else"); + Keywords.insert("final"); + Keywords.insert("for"); + Keywords.insert("if"); + Keywords.insert("mutable"); + Keywords.insert("namespace"); + Keywords.insert("new"); + Keywords.insert("private"); + Keywords.insert("public"); + Keywords.insert("requires"); + Keywords.insert("return"); + Keywords.insert("static"); + Keywords.insert("static_assert"); + Keywords.insert("using"); + Keywords.insert("void"); + Keywords.insert("volatile"); + Keywords.insert("while"); + + // Builtin types we highlight + Keywords.insert("void"); + Keywords.insert("char"); + Keywords.insert("short"); + Keywords.insert("int"); + Keywords.insert("unsigned"); + Keywords.insert("long"); + Keywords.insert("float"); + Keywords.insert("double"); + + Literals.insert("true"); + Literals.insert("false"); + Literals.insert("nullptr"); + + Initialized = true; +} + +static SourceManager createTempSourceManager() { + FileSystemOptions FileOpts; + FileManager FileMgr(FileOpts); + llvm::IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs()); + llvm::IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions()); + DiagnosticsEngine diags(DiagIDs, DiagOpts); + return SourceManager(diags, FileMgr); +} + +static Lexer createTempLexer(llvm::MemoryBufferRef B, SourceManager &FakeSM, + const LangOptions &LangOpts) { + return Lexer(FakeSM.createFileID(B), B, FakeSM, LangOpts); +} + +std::vector<StyleRange> +CodeSnippetHighlighter::highlightLine(StringRef SourceLine, + const LangOptions &LangOpts) { + ensureTokenData(); + + constexpr raw_ostream::Colors CommentColor = raw_ostream::BLACK; + constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; + constexpr raw_ostream::Colors KeywordColor = raw_ostream::YELLOW; + + const auto MemBuf = llvm::MemoryBuffer::getMemBuffer(SourceLine); + SourceManager FakeSM = createTempSourceManager(); + Lexer L = createTempLexer(MemBuf->getMemBufferRef(), FakeSM, LangOpts); + L.SetKeepWhitespaceMode(true); + + std::vector<StyleRange> Styles; + bool Stop = false; + while (!Stop) { + Token tok; + Stop = L.LexFromRawLexer(tok); + if (tok.is(tok::unknown)) + continue; + + bool Invalid; + unsigned Start = + FakeSM.getSpellingColumnNumber(tok.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + + if (tok.is(tok::raw_identifier)) { + // Almost everything we lex is an identifier, since we use a raw lexer. + // Some should be highlightes as literals, others as keywords. + if (Keywords.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), KeywordColor}); + else if (Literals.contains(tok.getRawIdentifier())) + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok::isLiteral(tok.getKind())) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), LiteralColor}); + } else if (tok.is(tok::comment)) { + Styles.push_back( + StyleRange{Start, Start + tok.getLength(), CommentColor}); + } + } + + return Styles; +} diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index eaa6e8d29a1dece..a7a2405d9bbae6d 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Frontend/CodeSnippetHighlighter.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -1248,7 +1249,7 @@ void TextDiagnostic::emitSnippetAndCaret( } // Emit what we have computed. - emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo); + emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo, false, SM); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1278,7 +1279,11 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo) { + unsigned LineNo, bool IsSlash, + const SourceManager &SM) { + std::vector<StyleRange> Styles = + SnippetHighlighter.highlightLine(SourceLine, LangOpts); + // Emit line number. if (MaxLineNoDisplayWidth > 0) { unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); @@ -1288,11 +1293,33 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, // Print the source line one character at a time. bool PrintReversed = false; + bool HighlightingEnabled = DiagOpts->ShowColors; size_t I = 0; while (I < SourceLine.size()) { auto [Str, WasPrintable] = printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop); + // Just stop highlighting anything for this line if we found a non-printable + // character. + if (!WasPrintable) + HighlightingEnabled = false; + + // FIXME: I hope we can do this in some nicer way. + if (HighlightingEnabled) { + std::optional<enum raw_ostream::Colors> H; + for (auto &P : Styles) { + if (P.Start < I && P.End >= I) { + H = P.c; + break; + } + } + + if (H) { + OS.changeColor(*H, false); + } else + OS.resetColor(); + } + // Toggle inverted colors on or off for this character. if (DiagOpts->ShowColors) { if (WasPrintable == PrintReversed) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits