[PATCH] D131632: [WIP] Enable SARIF Diagnostics

Abraham Corea Diaz via Phabricator via cfe-commits Wed, 10 Aug 2022 16:44:52 -0700

abrahamcd created this revision.
Herald added a subscriber: mgorny.
Herald added a project: All.
abrahamcd requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


Work in progress to enable Clang to emit SARIF diagnostics.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D131632

Files:
  clang/include/clang/Frontend/SARIFDiagnostic.h
  clang/include/clang/Frontend/SARIFDiagnosticPrinter.h
  clang/lib/Frontend/CMakeLists.txt
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/FrontendAction.cpp
  clang/lib/Frontend/SARIFDiagnostic.cpp
  clang/lib/Frontend/SARIFDiagnosticPrinter.cpp
  clang/unittests/Frontend/CMakeLists.txt
  clang/unittests/Frontend/SARIFDiagnosticTest.cpp
  clang/unittests/Frontend/sarif-diagnostics.cpp

Index: clang/unittests/Frontend/sarif-diagnostics.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Frontend/sarif-diagnostics.cpp
@@ -0,0 +1,138 @@
+// RUN: %clang -fdiagnostics-format=sarif %s -o %t.exe -DGTEST
+// RUN: %clang -fsyntax-only -Wall -Wextra -fdiagnostics-format=sarif %s 2>
+// %t.diags || true RUN: %t.exe < %t.diags
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Program.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <filesystem>
+#include <vector>
+
+namespace {
+
+constexpr llvm::StringRef BrokenProgram =
+    R"(// Example errors below start on line 2
+void main() {
+  int i = hello;
+
+  float test = 1a.0;
+
+  if (true)
+    bool Yes = true;
+    return;
+
+  bool j = hi;
+}
+})";
+
+TEST(SARIFDiagnosticTest, TestFields) {
+  llvm::SmallString<256> SearchDir;
+  llvm::sys::fs::current_path(SearchDir);
+  
+  SearchDir.append("/../../../bin");
+  // ASSERT_EQ(SearchDir.str(), "hi");
+  llvm::ErrorOr<std::string> ClangPathOrErr =
+  llvm::sys::findProgramByName("clang", {SearchDir});
+  ASSERT_TRUE(ClangPathOrErr);
+  const std::string &ClangPath = *ClangPathOrErr;
+  // ASSERT_EQ(ClangPath, "hi");
+
+  llvm::ErrorOr<std::string> EchoPathOrErr =
+      llvm::sys::findProgramByName("echo");
+  ASSERT_TRUE(EchoPathOrErr);
+  const std::string &EchoPath = *EchoPathOrErr;
+
+  int EchoInputFD;
+  llvm::SmallString<32> EchoInputFile, EchoOutputFile;
+  llvm::sys::fs::createTemporaryFile("echo-input", "", EchoInputFD,
+                                     EchoInputFile);
+  llvm::sys::fs::createTemporaryFile("echo-output", "", EchoOutputFile);
+  llvm::FileRemover InputRemover(EchoInputFile.c_str());
+  llvm::FileRemover OutputRemover(EchoOutputFile.c_str());
+
+  llvm::Optional<llvm::StringRef> Redirects[] = {
+      EchoInputFile.str(), EchoOutputFile.str(), llvm::StringRef("")};
+
+  int RunResult = llvm::sys::ExecuteAndWait(EchoPath, {"echo", BrokenProgram},
+                                            llvm::None, Redirects);
+  ASSERT_EQ(RunResult, 0);
+
+  // auto EchoOutputBuf = llvm::MemoryBuffer::getFile(EchoOutputFile.c_str());
+  // ASSERT_TRUE(EchoOutputBuf);
+  // llvm::StringRef EchoOutput = EchoOutputBuf.get()->getBuffer();
+  // ASSERT_EQ(EchoOutput.str(), "hi");
+
+  llvm::SmallString<32> ClangErrFile;
+  llvm::sys::fs::createTemporaryFile("clang-err", "", ClangErrFile);
+  llvm::FileRemover ClangErrRemover(ClangErrFile.c_str());
+
+  llvm::Optional<llvm::StringRef> ClangRedirects[] = {
+      EchoOutputFile.str(), llvm::StringRef(""), ClangErrFile.str()};
+  llvm::StringRef Args[] = {"clang",
+                            "-xc++",
+                            "-",
+                            "-fsyntax-only",
+                            "-Wall",
+                            "-Wextra",
+                            "-fdiagnostics-format=sarif"};
+
+  int ClangResult =
+      llvm::sys::ExecuteAndWait(ClangPath, Args, llvm::None, ClangRedirects);
+  ASSERT_EQ(ClangResult, 1);
+
+  // auto ClangOutputBuf = llvm::MemoryBuffer::getFile(ClangOutputFile.c_str());
+  // ASSERT_TRUE(ClangOutputBuf);
+  // llvm::StringRef ClangOutput = ClangOutputBuf.get()->getBuffer();
+  // ASSERT_EQ(ClangOutput.str(), "hi");
+
+  auto ClangErrBuf = llvm::MemoryBuffer::getFile(ClangErrFile.c_str());
+  ASSERT_TRUE(ClangErrBuf);
+  llvm::StringRef ClangErr = ClangErrBuf.get()->getBuffer();
+  ASSERT_EQ(ClangErr.str(), "hi");
+
+  llvm::Expected<llvm::json::Value> Value = llvm::json::parse(ClangErr.str());
+  ASSERT_FALSE(!Value);
+
+  llvm::json::Object *SarifDoc = Value->getAsObject();
+
+  const llvm::json::Array *Runs = SarifDoc->getArray("runs");
+  const llvm::json::Object *TheRun = Runs->back().getAsObject();
+  const llvm::json::Array *Results = TheRun->getArray("results");
+  
+  // Check Artifacts
+  const llvm::json::Array *Artifacts = TheRun->getArray("artifacts");
+  const llvm::json::Object *TheArtifact = Artifacts->back().getAsObject();
+  const llvm::json::Object *Location = TheArtifact->getObject("location");
+
+  ASSERT_TRUE(Location->getInteger("index").hasValue());
+  ASSERT_TRUE(Location->getString("uri").hasValue());
+
+  EXPECT_EQ(Location->getInteger("index").getValue(), 0);
+  EXPECT_EQ(Location->getString("uri").getValue(), "file://<stdin>");
+
+  // Check Driver
+  const llvm::json::Object *Driver =
+      TheRun->getObject("tool")->getObject("driver");
+
+  ASSERT_TRUE(Driver->getString("name").hasValue());
+  ASSERT_TRUE(Driver->getString("fullName").hasValue());
+
+  EXPECT_EQ(Driver->getString("name").getValue(), "clang");
+  EXPECT_EQ(Driver->getString("fullName").getValue(), "clang-15");
+
+  // Check Rules
+  const llvm::json::Array *Rules = Driver->getArray("rules");
+  std::vector<std::string> IDs;
+
+
+
+
+}
+
+} // namespace
Index: clang/unittests/Frontend/SARIFDiagnosticTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Frontend/SARIFDiagnosticTest.cpp
@@ -0,0 +1,100 @@
+// //===- unittests/Frontend/SARIFDiagnosticTest.cpp - ------------------------===//
+// //
+// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// // See https://llvm.org/LICENSE.txt for license information.
+// // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// //
+// //===----------------------------------------------------------------------===//
+
+// #include "clang/Frontend/SARIFDiagnostic.h"
+// #include "clang/Basic/FileManager.h"
+// #include "clang/Basic/LangOptions.h"
+// #include "clang/Basic/SourceManager.h"
+// #include "llvm/Support/SmallVectorMemoryBuffer.h"
+// #include "gtest/gtest.h"
+
+// using namespace llvm;
+// using namespace clang;
+
+// namespace {
+
+// /// Prints a diagnostic with the given DiagnosticOptions and the given
+// /// SourceLocation and returns the printed diagnostic text.
+// static std::string PrintDiag(const DiagnosticOptions &Opts, FullSourceLoc Loc) {
+//   std::string Out;
+//   llvm::raw_string_ostream OS(Out);
+//   clang::LangOptions LangOpts;
+//   // Owned by SARIFDiagnostic.
+//   DiagnosticOptions *DiagOpts = new DiagnosticOptions(Opts);
+//   SARIFDiagnostic Diag(OS, LangOpts, DiagOpts);
+//   // Emit a dummy diagnostic that is just 'message'.
+//   Diag.emitDiagnostic(Loc, DiagnosticsEngine::Level::Warning, "message",
+//                       /*Ranges=*/{}, /*FixItHints=*/{});
+//   OS.flush();
+//   return Out;
+// }
+
+// TEST(SARIFDiagnostic, ShowLine) {
+//   // Create dummy FileManager and SourceManager.
+//   FileSystemOptions FSOpts;
+//   FileManager FileMgr(FSOpts);
+//   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs);
+//   DiagnosticsEngine DiagEngine(DiagID, new DiagnosticOptions,
+//                                new IgnoringDiagConsumer());
+//   SourceManager SrcMgr(DiagEngine, FileMgr);
+
+//   // Create a dummy file with some contents to produce a test SourceLocation.
+//   const llvm::StringRef file_path = "main.cpp";
+//   const llvm::StringRef main_file_contents = "some\nsource\ncode\n";
+//   const clang::FileEntryRef fe = FileMgr.getVirtualFileRef(
+//       file_path,
+//       /*Size=*/static_cast<off_t>(main_file_contents.size()),
+//       /*ModificationTime=*/0);
+
+//   llvm::SmallVector<char, 64> buffer;
+//   buffer.append(main_file_contents.begin(), main_file_contents.end());
+//   auto file_contents = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+//       std::move(buffer), file_path, /*RequiresNullTerminator=*/false);
+//   SrcMgr.overrideFileContents(fe, std::move(file_contents));
+
+//   // Create the actual file id and use it as the main file.
+//   clang::FileID fid =
+//       SrcMgr.createFileID(fe, SourceLocation(), clang::SrcMgr::C_User);
+//   SrcMgr.setMainFileID(fid);
+
+//   // Create the source location for the test diagnostic.
+//   FullSourceLoc Loc(SrcMgr.translateLineCol(fid, /*Line=*/1, /*Col=*/2),
+//                     SrcMgr);
+
+//   DiagnosticOptions DiagOpts;
+//   DiagOpts.ShowLine = true;
+//   DiagOpts.ShowColumn = true;
+//   // Hide printing the source line/caret to make the diagnostic shorter and it's
+//   // not relevant for this test.
+//   DiagOpts.ShowCarets = false;
+//   EXPECT_EQ("main.cpp:1:2: warning: message\n", PrintDiag(DiagOpts, Loc));
+
+//   // Check that ShowLine doesn't influence the Vi/MSVC diagnostic formats as its
+//   // a Clang-specific diagnostic option.
+//   DiagOpts.setFormat(TextDiagnosticFormat::Vi);
+//   DiagOpts.ShowLine = false;
+//   EXPECT_EQ("main.cpp +1:2: warning: message\n", PrintDiag(DiagOpts, Loc));
+
+//   DiagOpts.setFormat(TextDiagnosticFormat::MSVC);
+//   DiagOpts.ShowLine = false;
+//   EXPECT_EQ("main.cpp(1,2): warning: message\n", PrintDiag(DiagOpts, Loc));
+
+//   // Reset back to the Clang format.
+//   DiagOpts.setFormat(TextDiagnosticFormat::Clang);
+
+//   // Hide line number but show column.
+//   DiagOpts.ShowLine = false;
+//   EXPECT_EQ("main.cpp:2: warning: message\n", PrintDiag(DiagOpts, Loc));
+
+//   // Show line number but hide column.
+//   DiagOpts.ShowLine = true;
+//   DiagOpts.ShowColumn = false;
+//   EXPECT_EQ("main.cpp:1: warning: message\n", PrintDiag(DiagOpts, Loc));
+// }
+
+// } // anonymous namespace
Index: clang/unittests/Frontend/CMakeLists.txt
===================================================================
--- clang/unittests/Frontend/CMakeLists.txt
+++ clang/unittests/Frontend/CMakeLists.txt
@@ -12,6 +12,8 @@
   ParsedSourceLocationTest.cpp
   PCHPreambleTest.cpp
   OutputStreamTest.cpp
+  sarif-diagnostics.cpp
+  SARIFDiagnosticTest.cpp
   TextDiagnosticTest.cpp
   UtilsTest.cpp
   )
Index: clang/lib/Frontend/SARIFDiagnosticPrinter.cpp
===================================================================
--- /dev/null
+++ clang/lib/Frontend/SARIFDiagnosticPrinter.cpp
@@ -0,0 +1,179 @@
+//===--- SARIFDiagnoSARIFPrinter.cpp - Diagnostic Printer -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This diagnostic client prints out their diagnostic messages.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/SARIFDiagnosticPrinter.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Frontend/SARIFDiagnostic.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/JSON.h"
+#include <algorithm>
+using namespace clang;
+
+SARIFDiagnosticPrinter::SARIFDiagnosticPrinter(raw_ostream &os,
+                                             DiagnosticOptions *diags,
+                                             bool _OwnsOutputStream)
+  : OS(os), DiagOpts(diags),
+    OwnsOutputStream(_OwnsOutputStream) {}
+
+SARIFDiagnosticPrinter::~SARIFDiagnosticPrinter() {
+  if (OwnsOutputStream)
+    delete &OS;
+}
+
+void SARIFDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
+                                            const Preprocessor *PP) {
+  // Build the SARIFDiagnostic utility.
+  assert(hasSarifWriter() && "Writer not set!");
+  SARIFDiag.reset(new SARIFDiagnostic(OS, LO, &*DiagOpts, &*Writer));
+  // Initialize the SARIF object.
+  Writer->createRun("clang", Prefix);
+}
+static std::string serializeSarifDocument(llvm::json::Object &&Doc) {
+  std::string Output;
+  llvm::json::Value value(std::move(Doc));
+  llvm::raw_string_ostream OS{Output};
+  OS << llvm::formatv("{0}", value);
+  OS.flush();
+  return Output;
+}
+
+void SARIFDiagnosticPrinter::EndSourceFile() {
+  Writer->endRun();
+  // const llvm::json::Object &Doc = Writer->createDocument();
+  // llvm::json::Value value(std::move(Doc));
+  llvm::json::Value value(std::move(Writer->createDocument()));
+  OS << value;
+  OS.flush();const SarifRule &Rule =
+      SarifRule::create()
+          .setRuleId("clang.unittest")
+          .setDescription("Example rule created during unit tests")
+          .setName("clang unit test");
+  SARIFDiag.reset();
+}
+
+/// Print any diagnostic option information to a raw_ostream.
+///
+/// This implements all of the logic for adding diagnostic options to a message
+/// (via OS). Each relevant option is comma separated and all are enclosed in
+/// the standard bracketing: " [...]".
+static void printDiagnosticOptions(raw_ostream &OS,  /// Seems that all this information might be important to add to sarif, but we dont need to just be printing it
+                                   DiagnosticsEngine::Level Level,
+                                   const Diagnostic &Info,
+                                   const DiagnosticOptions &DiagOpts) {
+  bool Started = false;
+  if (DiagOpts.ShowOptionNames) {
+    // Handle special cases for non-warnings early.
+    if (Info.getID() == diag::fatal_too_many_errors) {
+      OS << " [-ferror-limit=]";
+      return;
+    }
+
+    // The code below is somewhat fragile because we are essentially trying to
+    // report to the user what happened by inferring what the diagnostic engine
+    // did. Eventually it might make more sense to have the diagnostic engine
+    // include some "why" information in the diagnostic.
+
+    // If this is a warning which has been mapped to an error by the user (as
+    // inferred by checking whether the default mapping is to an error) then
+    // flag it as such. Note that diagnostics could also have been mapped by a
+    // pragma, but we don't currently have a way to distinguish this.
+    if (Level == DiagnosticsEngine::Error &&
+        DiagnosticIDs::isBuiltinWarningOrExtension(Info.getID()) &&
+        !DiagnosticIDs::isDefaultMappingAsError(Info.getID())) {
+      OS << " [-Werror";
+      Started = true;
+    }
+
+    StringRef Opt = DiagnosticIDs::getWarningOptionForDiag(Info.getID());
+    if (!Opt.empty()) {
+      OS << (Started ? "," : " [")
+         << (Level == DiagnosticsEngine::Remark ? "-R" : "-W") << Opt;
+      StringRef OptValue = Info.getDiags()->getFlagValue();
+      if (!OptValue.empty())
+        OS << "=" << OptValue;
+      Started = true;
+    }
+  }
+
+  // If the user wants to see category information, include it too.
+  if (DiagOpts.ShowCategories) {
+    unsigned DiagCategory =
+      DiagnosticIDs::getCategoryNumberForDiag(Info.getID());
+    if (DiagCategory) {
+      OS << (Started ? "," : " [");
+      Started = true;
+      if (DiagOpts.ShowCategories == 1)
+        OS << DiagCategory;
+      else {
+        assert(DiagOpts.ShowCategories == 2 && "Invalid ShowCategories value");
+        OS << DiagnosticIDs::getCategoryNameFromID(DiagCategory);
+      }
+    }
+  }
+  if (Started)
+    OS << ']';
+}
+
+void SARIFDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level,
+                                             const Diagnostic &Info) {
+  // Default implementation (Warnings/errors count). // Keeps track of the number of errors
+  DiagnosticConsumer::HandleDiagnostic(Level, Info);
+
+  // Render the diagnostic message into a temporary buffer eagerly. We'll use
+  // this later as we print out the diagnostic to the terminal.
+  SmallString<100> OutStr;
+  Info.FormatDiagnostic(OutStr);
+
+  llvm::raw_svector_ostream DiagMessageStream(OutStr);
+  // printDiagnosticOptions(DiagMessageStream, Level, Info, *DiagOpts);
+
+  // Keeps track of the starting position of the location
+  // information (e.g., "foo.c:10:4:") that precedes the error
+  // message. We use this information to determine how long the
+  // file+line+column number prefix is.
+  uint64_t StartOfLocationInfo = OS.tell();
+
+  if (!Prefix.empty())
+    OS << Prefix << ": ";
+
+  // Use a dedicated, simpler path for diagnostics without a valid location.
+  // This is important as if the location is missing, we may be emitting
+  // diagnostics in a context that lacks language options, a source manager, or
+  // other infrastructure necessary when emitting more rich diagnostics.
+  if (!Info.getLocation().isValid()) {
+    SARIFDiagnostic::printDiagnosticLevel(OS, Level, DiagOpts->ShowColors);
+    SARIFDiagnostic::printDiagnosticMessage(
+        OS, /*IsSupplemental=*/Level == DiagnosticsEngine::Note,
+        DiagMessageStream.str(), OS.tell() - StartOfLocationInfo,
+        DiagOpts->MessageLength, DiagOpts->ShowColors);
+    OS.flush();
+    return;
+  }
+
+  // Assert that the rest of our infrastructure is setup properly.
+  assert(DiagOpts && "Unexpected diagnostic without options set");
+  assert(Info.hasSourceManager() &&
+         "Unexpected diagnostic with no source manager");
+  assert(SARIFDiag && "Unexpected diagnostic outside source file processing");
+  OS << Info.getID();
+
+  SARIFDiag->emitDiagnostic(
+      FullSourceLoc(Info.getLocation(), Info.getSourceManager()), Level,
+      DiagMessageStream.str(), Info.getRanges(), Info.getFixItHints());
+
+  OS.flush();
+}
Index: clang/lib/Frontend/SARIFDiagnostic.cpp
===================================================================
--- /dev/null
+++ clang/lib/Frontend/SARIFDiagnostic.cpp
@@ -0,0 +1,1374 @@
+//===--- SARIFDiagnostic.cpp - Text Diagnostic Pretty-Printing
+//-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/SARIFDiagnostic.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Locale.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace clang;
+
+static const enum raw_ostream::Colors noteColor = raw_ostream::BLACK;
+static const enum raw_ostream::Colors remarkColor = raw_ostream::BLUE;
+static const enum raw_ostream::Colors fixitColor = raw_ostream::GREEN;
+static const enum raw_ostream::Colors caretColor = raw_ostream::GREEN;
+static const enum raw_ostream::Colors warningColor = raw_ostream::MAGENTA;
+static const enum raw_ostream::Colors templateColor = raw_ostream::CYAN;
+static const enum raw_ostream::Colors errorColor = raw_ostream::RED;
+static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
+// Used for changing only the bold attribute.
+static const enum raw_ostream::Colors savedColor = raw_ostream::SAVEDCOLOR;
+
+/// Add highlights to differences in template strings.
+static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
+                                      bool &Normal, bool Bold) {
+  while (true) {
+    size_t Pos = Str.find(ToggleHighlight);
+    OS << Str.slice(0, Pos);
+    if (Pos == StringRef::npos)
+      break;
+
+    Str = Str.substr(Pos + 1);
+    if (Normal)
+      OS.changeColor(templateColor, true);
+    else {
+      OS.resetColor();
+      if (Bold)
+        OS.changeColor(savedColor, true);
+    }
+    Normal = !Normal;
+  }
+}
+
+/// Number of spaces to indent when word-wrapping.
+const unsigned WordWrapIndentation = 6;
+
+static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) {
+  int bytes = 0;
+  while (0 < i) {
+    if (SourceLine[--i] == '\t')
+      break;
+    ++bytes;
+  }
+  return bytes;
+}
+
+/// returns a printable representation of first item from input range
+///
+/// This function returns a printable representation of the next item in a line
+///  of source. If the next byte begins a valid and printable character, that
+///  character is returned along with 'true'.
+///
+/// Otherwise, if the next byte begins a valid, but unprintable character, a
+///  printable, escaped representation of the character is returned, along with
+///  'false'. Otherwise a printable, escaped representation of the next byte
+///  is returned along with 'false'.
+///
+/// \note The index is updated to be used with a subsequent call to
+///        printableTextForNextCharacter.
+///
+/// \param SourceLine The line of source
+/// \param i Pointer to byte index,
+/// \param TabStop used to expand tabs
+/// \return pair(printable text, 'true' iff original text was printable)
+///
+static std::pair<SmallString<16>, bool>
+printableTextForNextCharacter(StringRef SourceLine, size_t *i,
+                              unsigned TabStop) {
+  assert(i && "i must not be null");
+  assert(*i < SourceLine.size() && "must point to a valid index");
+
+  if (SourceLine[*i] == '\t') {
+    assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
+           "Invalid -ftabstop value");
+    unsigned col = bytesSincePreviousTabOrLineBegin(SourceLine, *i);
+    unsigned NumSpaces = TabStop - col % TabStop;
+    assert(0 < NumSpaces && NumSpaces <= TabStop &&
+           "Invalid computation of space amt");
+    ++(*i);
+
+    SmallString<16> expandedTab;
+    expandedTab.assign(NumSpaces, ' ');
+    return std::make_pair(expandedTab, true);
+  }
+
+  unsigned char const *begin, *end;
+  begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
+  end = begin + (SourceLine.size() - *i);
+
+  if (llvm::isLegalUTF8Sequence(begin, end)) {
+    llvm::UTF32 c;
+    llvm::UTF32 *cptr = &c;
+    unsigned char const *original_begin = begin;
+    unsigned char const *cp_end =
+        begin + llvm::getNumBytesForUTF8(SourceLine[*i]);
+
+    llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
+        &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
+    (void)res;
+    assert(llvm::conversionOK == res);
+    assert(0 < begin - original_begin &&
+           "we must be further along in the string now");
+    *i += begin - original_begin;
+
+    if (!llvm::sys::locale::isPrint(c)) {
+      // If next character is valid UTF-8, but not printable
+      SmallString<16> expandedCP("<U+>");
+      while (c) {
+        expandedCP.insert(expandedCP.begin() + 3, llvm::hexdigit(c % 16));
+        c /= 16;
+      }
+      while (expandedCP.size() < 8)
+        expandedCP.insert(expandedCP.begin() + 3, llvm::hexdigit(0));
+      return std::make_pair(expandedCP, false);
+    }
+
+    // If next character is valid UTF-8, and printable
+    return std::make_pair(SmallString<16>(original_begin, cp_end), true);
+  }
+
+  // If next byte is not valid UTF-8 (and therefore not printable)
+  SmallString<16> expandedByte("<XX>");
+  unsigned char byte = SourceLine[*i];
+  expandedByte[1] = llvm::hexdigit(byte / 16);
+  expandedByte[2] = llvm::hexdigit(byte % 16);
+  ++(*i);
+  return std::make_pair(expandedByte, false);
+}
+
+static void expandTabs(std::string &SourceLine, unsigned TabStop) {
+  size_t i = SourceLine.size();
+  while (i > 0) {
+    i--;
+    if (SourceLine[i] != '\t')
+      continue;
+    size_t tmp_i = i;
+    std::pair<SmallString<16>, bool> res =
+        printableTextForNextCharacter(SourceLine, &tmp_i, TabStop);
+    SourceLine.replace(i, 1, res.first.c_str());
+  }
+}
+
+/// This function takes a raw source line and produces a mapping from the bytes
+///  of the printable representation of the line to the columns those printable
+///  characters will appear at (numbering the first column as 0).
+///
+/// If a byte 'i' corresponds to multiple columns (e.g. the byte contains a tab
+///  character) then the array will map that byte to the first column the
+///  tab appears at and the next value in the map will have been incremented
+///  more than once.
+///
+/// If a byte is the first in a sequence of bytes that together map to a single
+///  entity in the output, then the array will map that byte to the appropriate
+///  column while the subsequent bytes will be -1.
+///
+/// The last element in the array does not correspond to any byte in the input
+///  and instead is the number of columns needed to display the source
+///
+/// example: (given a tabstop of 8)
+///
+///    "a \t \u3042" -> {0,1,2,8,9,-1,-1,11}
+///
+///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
+///   display)
+static void byteToColumn(StringRef SourceLine, unsigned TabStop,
+                         SmallVectorImpl<int> &out) {
+  out.clear();
+
+  if (SourceLine.empty()) {
+    out.resize(1u, 0);
+    return;
+  }
+
+  out.resize(SourceLine.size() + 1, -1);
+
+  int columns = 0;
+  size_t i = 0;
+  while (i < SourceLine.size()) {
+    out[i] = columns;
+    std::pair<SmallString<16>, bool> res =
+        printableTextForNextCharacter(SourceLine, &i, TabStop);
+    columns += llvm::sys::locale::columnWidth(res.first);
+  }
+  out.back() = columns;
+}
+
+/// This function takes a raw source line and produces a mapping from columns
+///  to the byte of the source line that produced the character displaying at
+///  that column. This is the inverse of the mapping produced by byteToColumn()
+///
+/// The last element in the array is the number of bytes in the source string
+///
+/// example: (given a tabstop of 8)
+///
+///    "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7}
+///
+///  (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
+///   display)
+static void columnToByte(StringRef SourceLine, unsigned TabStop,
+                         SmallVectorImpl<int> &out) {
+  out.clear();
+
+  if (SourceLine.empty()) {
+    out.resize(1u, 0);
+    return;
+  }
+
+  int columns = 0;
+  size_t i = 0;
+  while (i < SourceLine.size()) {
+    out.resize(columns + 1, -1);
+    out.back() = i;
+    std::pair<SmallString<16>, bool> res =
+        printableTextForNextCharacter(SourceLine, &i, TabStop);
+    columns += llvm::sys::locale::columnWidth(res.first);
+  }
+  out.resize(columns + 1, -1);
+  out.back() = i;
+}
+
+namespace {
+struct SourceColumnMap {
+  SourceColumnMap(StringRef SourceLine, unsigned TabStop)
+      : m_SourceLine(SourceLine) {
+
+    ::byteToColumn(SourceLine, TabStop, m_byteToColumn);
+    ::columnToByte(SourceLine, TabStop, m_columnToByte);
+
+    assert(m_byteToColumn.size() == SourceLine.size() + 1);
+    assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size());
+    assert(m_byteToColumn.size() ==
+           static_cast<unsigned>(m_columnToByte.back() + 1));
+    assert(static_cast<unsigned>(m_byteToColumn.back() + 1) ==
+           m_columnToByte.size());
+  }
+  int columns() const { return m_byteToColumn.back(); }
+  int bytes() const { return m_columnToByte.back(); }
+
+  /// Map a byte to the column which it is at the start of, or return -1
+  /// if it is not at the start of a column (for a UTF-8 trailing byte).
+  int byteToColumn(int n) const {
+    assert(0 <= n && n < static_cast<int>(m_byteToColumn.size()));
+    return m_byteToColumn[n];
+  }
+
+  /// Map a byte to the first column which contains it.
+  int byteToContainingColumn(int N) const {
+    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size()));
+    while (m_byteToColumn[N] == -1)
+      --N;
+    return m_byteToColumn[N];
+  }
+
+  /// Map a column to the byte which starts the column, or return -1 if
+  /// the column the second or subsequent column of an expanded tab or similar
+  /// multi-column entity.
+  int columnToByte(int n) const {
+    assert(0 <= n && n < static_cast<int>(m_columnToByte.size()));
+    return m_columnToByte[n];
+  }
+
+  /// Map from a byte index to the next byte which starts a column.
+  int startOfNextColumn(int N) const {
+    assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1));
+    while (byteToColumn(++N) == -1) {
+    }
+    return N;
+  }
+
+  /// Map from a byte index to the previous byte which starts a column.
+  int startOfPreviousColumn(int N) const {
+    assert(0 < N && N < static_cast<int>(m_byteToColumn.size()));
+    while (byteToColumn(--N) == -1) {
+    }
+    return N;
+  }
+
+  StringRef getSourceLine() const { return m_SourceLine; }
+
+private:
+  const std::string m_SourceLine;
+  SmallVector<int, 200> m_byteToColumn;
+  SmallVector<int, 200> m_columnToByte;
+};
+} // end anonymous namespace
+
+/// When the source code line we want to print is too long for
+/// the terminal, select the "interesting" region.
+static void selectInterestingSourceRegion(std::string &SourceLine,
+                                          std::string &CaretLine,
+                                          std::string &FixItInsertionLine,
+                                          unsigned Columns,
+                                          const SourceColumnMap &map) {
+  unsigned CaretColumns = CaretLine.size();
+  unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine);
+  unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()),
+                                 std::max(CaretColumns, FixItColumns));
+  // if the number of columns is less than the desired number we're done
+  if (MaxColumns <= Columns)
+    return;
+
+  // No special characters are allowed in CaretLine.
+  assert(CaretLine.end() ==
+         llvm::find_if(CaretLine, [](char c) { return c < ' ' || '~' < c; }));
+
+  // Find the slice that we need to display the full caret line
+  // correctly.
+  unsigned CaretStart = 0, CaretEnd = CaretLine.size();
+  for (; CaretStart != CaretEnd; ++CaretStart)
+    if (!isWhitespace(CaretLine[CaretStart]))
+      break;
+
+  for (; CaretEnd != CaretStart; --CaretEnd)
+    if (!isWhitespace(CaretLine[CaretEnd - 1]))
+      break;
+
+  // caret has already been inserted into CaretLine so the above whitespace
+  // check is guaranteed to include the caret
+
+  // If we have a fix-it line, make sure the slice includes all of the
+  // fix-it information.
+  if (!FixItInsertionLine.empty()) {
+    unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size();
+    for (; FixItStart != FixItEnd; ++FixItStart)
+      if (!isWhitespace(FixItInsertionLine[FixItStart]))
+        break;
+
+    for (; FixItEnd != FixItStart; --FixItEnd)
+      if (!isWhitespace(FixItInsertionLine[FixItEnd - 1]))
+        break;
+
+    // We can safely use the byte offset FixItStart as the column offset
+    // because the characters up until FixItStart are all ASCII whitespace
+    // characters.
+    unsigned FixItStartCol = FixItStart;
+    unsigned FixItEndCol =
+        llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd));
+
+    CaretStart = std::min(FixItStartCol, CaretStart);
+    CaretEnd = std::max(FixItEndCol, CaretEnd);
+  }
+
+  // CaretEnd may have been set at the middle of a character
+  // If it's not at a character's first column then advance it past the current
+  //   character.
+  while (static_cast<int>(CaretEnd) < map.columns() &&
+         -1 == map.columnToByte(CaretEnd))
+    ++CaretEnd;
+
+  assert((static_cast<int>(CaretStart) > map.columns() ||
+          -1 != map.columnToByte(CaretStart)) &&
+         "CaretStart must not point to a column in the middle of a source"
+         " line character");
+  assert((static_cast<int>(CaretEnd) > map.columns() ||
+          -1 != map.columnToByte(CaretEnd)) &&
+         "CaretEnd must not point to a column in the middle of a source line"
+         " character");
+
+  // CaretLine[CaretStart, CaretEnd) contains all of the interesting
+  // parts of the caret line. While this slice is smaller than the
+  // number of columns we have, try to grow the slice to encompass
+  // more context.
+
+  unsigned SourceStart =
+      map.columnToByte(std::min<unsigned>(CaretStart, map.columns()));
+  unsigned SourceEnd =
+      map.columnToByte(std::min<unsigned>(CaretEnd, map.columns()));
+
+  unsigned CaretColumnsOutsideSource =
+      CaretEnd - CaretStart -
+      (map.byteToColumn(SourceEnd) - map.byteToColumn(SourceStart));
+
+  char const *front_ellipse = "  ...";
+  char const *front_space = "     ";
+  char const *back_ellipse = "...";
+  unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse);
+
+  unsigned TargetColumns = Columns;
+  // Give us extra room for the ellipses
+  //  and any of the caret line that extends past the source
+  if (TargetColumns > ellipses_space + CaretColumnsOutsideSource)
+    TargetColumns -= ellipses_space + CaretColumnsOutsideSource;
+
+  while (SourceStart > 0 || SourceEnd < SourceLine.size()) {
+    bool ExpandedRegion = false;
+
+    if (SourceStart > 0) {
+      unsigned NewStart = map.startOfPreviousColumn(SourceStart);
+
+      // Skip over any whitespace we see here; we're looking for
+      // another bit of interesting text.
+      // FIXME: Detect non-ASCII whitespace characters too.
+      while (NewStart && isWhitespace(SourceLine[NewStart]))
+        NewStart = map.startOfPreviousColumn(NewStart);
+
+      // Skip over this bit of "interesting" text.
+      while (NewStart) {
+        unsigned Prev = map.startOfPreviousColumn(NewStart);
+        if (isWhitespace(SourceLine[Prev]))
+          break;
+        NewStart = Prev;
+      }
+
+      assert(map.byteToColumn(NewStart) != -1);
+      unsigned NewColumns =
+          map.byteToColumn(SourceEnd) - map.byteToColumn(NewStart);
+      if (NewColumns <= TargetColumns) {
+        SourceStart = NewStart;
+        ExpandedRegion = true;
+      }
+    }
+
+    if (SourceEnd < SourceLine.size()) {
+      unsigned NewEnd = map.startOfNextColumn(SourceEnd);
+
+      // Skip over any whitespace we see here; we're looking for
+      // another bit of interesting text.
+      // FIXME: Detect non-ASCII whitespace characters too.
+      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
+        NewEnd = map.startOfNextColumn(NewEnd);
+
+      // Skip over this bit of "interesting" text.
+      while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
+        NewEnd = map.startOfNextColumn(NewEnd);
+
+      assert(map.byteToColumn(NewEnd) != -1);
+      unsigned NewColumns =
+          map.byteToColumn(NewEnd) - map.byteToColumn(SourceStart);
+      if (NewColumns <= TargetColumns) {
+        SourceEnd = NewEnd;
+        ExpandedRegion = true;
+      }
+    }
+
+    if (!ExpandedRegion)
+      break;
+  }
+
+  CaretStart = map.byteToColumn(SourceStart);
+  CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;
+
+  // [CaretStart, CaretEnd) is the slice we want. Update the various
+  // output lines to show only this slice, with two-space padding
+  // before the lines so that it looks nicer.
+
+  assert(CaretStart != (unsigned)-1 && CaretEnd != (unsigned)-1 &&
+         SourceStart != (unsigned)-1 && SourceEnd != (unsigned)-1);
+  assert(SourceStart <= SourceEnd);
+  assert(CaretStart <= CaretEnd);
+
+  unsigned BackColumnsRemoved =
+      map.byteToColumn(SourceLine.size()) - map.byteToColumn(SourceEnd);
+  unsigned FrontColumnsRemoved = CaretStart;
+  unsigned ColumnsKept = CaretEnd - CaretStart;
+
+  // We checked up front that the line needed truncation
+  assert(FrontColumnsRemoved + ColumnsKept + BackColumnsRemoved > Columns);
+
+  // The line needs some truncation, and we'd prefer to keep the front
+  //  if possible, so remove the back
+  if (BackColumnsRemoved > strlen(back_ellipse))
+    SourceLine.replace(SourceEnd, std::string::npos, back_ellipse);
+
+  // If that's enough then we're done
+  if (FrontColumnsRemoved + ColumnsKept <= Columns)
+    return;
+
+  // Otherwise remove the front as well
+  if (FrontColumnsRemoved > strlen(front_ellipse)) {
+    SourceLine.replace(0, SourceStart, front_ellipse);
+    CaretLine.replace(0, CaretStart, front_space);
+    if (!FixItInsertionLine.empty())
+      FixItInsertionLine.replace(0, CaretStart, front_space);
+  }
+}
+
+/// Skip over whitespace in the string, starting at the given
+/// index.
+///
+/// \returns The index of the first non-whitespace character that is
+/// greater than or equal to Idx or, if no such character exists,
+/// returns the end of the string.
+static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length) {
+  while (Idx < Length && isWhitespace(Str[Idx]))
+    ++Idx;
+  return Idx;
+}
+
+/// If the given character is the start of some kind of
+/// balanced punctuation (e.g., quotes or parentheses), return the
+/// character that will terminate the punctuation.
+///
+/// \returns The ending punctuation character, if any, or the NULL
+/// character if the input character does not start any punctuation.
+static inline char findMatchingPunctuation(char c) {
+  switch (c) {
+  case '\'':
+    return '\'';
+  case '`':
+    return '\'';
+  case '"':
+    return '"';
+  case '(':
+    return ')';
+  case '[':
+    return ']';
+  case '{':
+    return '}';
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+/// Find the end of the word starting at the given offset
+/// within a string.
+///
+/// \returns the index pointing one character past the end of the
+/// word.
+static unsigned findEndOfWord(unsigned Start, StringRef Str, unsigned Length,
+                              unsigned Column, unsigned Columns) {
+  assert(Start < Str.size() && "Invalid start position!");
+  unsigned End = Start + 1;
+
+  // If we are already at the end of the string, take that as the word.
+  if (End == Str.size())
+    return End;
+
+  // Determine if the start of the string is actually opening
+  // punctuation, e.g., a quote or parentheses.
+  char EndPunct = findMatchingPunctuation(Str[Start]);
+  if (!EndPunct) {
+    // This is a normal word. Just find the first space character.
+    while (End < Length && !isWhitespace(Str[End]))
+      ++End;
+    return End;
+  }
+
+  // We have the start of a balanced punctuation sequence (quotes,
+  // parentheses, etc.). Determine the full sequence is.
+  SmallString<16> PunctuationEndStack;
+  PunctuationEndStack.push_back(EndPunct);
+  while (End < Length && !PunctuationEndStack.empty()) {
+    if (Str[End] == PunctuationEndStack.back())
+      PunctuationEndStack.pop_back();
+    else if (char SubEndPunct = findMatchingPunctuation(Str[End]))
+      PunctuationEndStack.push_back(SubEndPunct);
+
+    ++End;
+  }
+
+  // Find the first space character after the punctuation ended.
+  while (End < Length && !isWhitespace(Str[End]))
+    ++End;
+
+  unsigned PunctWordLength = End - Start;
+  if ( // If the word fits on this line
+      Column + PunctWordLength <= Columns ||
+      // ... or the word is "short enough" to take up the next line
+      // without too much ugly white space
+      PunctWordLength < Columns / 3)
+    return End; // Take the whole thing as a single "word".
+
+  // The whole quoted/parenthesized string is too long to print as a
+  // single "word". Instead, find the "word" that starts just after
+  // the punctuation and use that end-point instead. This will recurse
+  // until it finds something small enough to consider a word.
+  return findEndOfWord(Start + 1, Str, Length, Column + 1, Columns);
+}
+
+/// Print the given string to a stream, word-wrapping it to
+/// some number of columns in the process.
+///
+/// \param OS the stream to which the word-wrapping string will be
+/// emitted.
+/// \param Str the string to word-wrap and output.
+/// \param Columns the number of columns to word-wrap to.
+/// \param Column the column number at which the first character of \p
+/// Str will be printed. This will be non-zero when part of the first
+/// line has already been printed.
+/// \param Bold if the current text should be bold
+/// \param Indentation the number of spaces to indent any lines beyond
+/// the first line.
+/// \returns true if word-wrapping was required, or false if the
+/// string fit on the first line.
+static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
+                             unsigned Column = 0, bool Bold = false,
+                             unsigned Indentation = WordWrapIndentation) {
+  const unsigned Length = std::min(Str.find('\n'), Str.size());
+  bool TextNormal = true;
+
+  // The string used to indent each line.
+  SmallString<16> IndentStr;
+  IndentStr.assign(Indentation, ' ');
+  bool Wrapped = false;
+  for (unsigned WordStart = 0, WordEnd; WordStart < Length;
+       WordStart = WordEnd) {
+    // Find the beginning of the next word.
+    WordStart = skipWhitespace(WordStart, Str, Length);
+    if (WordStart == Length)
+      break;
+
+    // Find the end of this word.
+    WordEnd = findEndOfWord(WordStart, Str, Length, Column, Columns);
+
+    // Does this word fit on the current line?
+    unsigned WordLength = WordEnd - WordStart;
+    if (Column + WordLength < Columns) {
+      // This word fits on the current line; print it there.
+      if (WordStart) {
+        OS << ' ';
+        Column += 1;
+      }
+      applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength),
+                                TextNormal, Bold);
+      Column += WordLength;
+      continue;
+    }
+
+    // This word does not fit on the current line, so wrap to the next
+    // line.
+    OS << '\n';
+    OS.write(&IndentStr[0], Indentation);
+    applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), TextNormal,
+                              Bold);
+    Column = Indentation + WordLength;
+    Wrapped = true;
+  }
+
+  // Append any remaning text from the message with its existing formatting.
+  applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold);
+
+  assert(TextNormal && "Text highlighted at end of diagnostic message.");
+
+  return Wrapped;
+}
+
+SARIFDiagnostic::SARIFDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
+                                 DiagnosticOptions *DiagOpts,
+                                 SarifDocumentWriter *Writer)
+    : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), Writer(Writer) {}
+
+void SARIFDiagnostic::emitDiagnosticMessage(
+    FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level,
+    StringRef Message, ArrayRef<clang::CharSourceRange> Ranges,
+    DiagOrStoredDiag D) {
+  uint64_t StartOfLocationInfo = OS.tell();
+
+  // Emit the location of this particular diagnostic.
+  if (Loc.isValid())
+    emitDiagnosticLoc(Loc, PLoc, Level, Ranges);
+
+  if (DiagOpts->ShowColors)
+    OS.resetColor();
+
+  if (DiagOpts->ShowLevel)
+    printDiagnosticLevel(OS, Level, DiagOpts->ShowColors);
+  printDiagnosticMessage(OS,
+                         /*IsSupplemental*/ Level == DiagnosticsEngine::Note,
+                         Message, OS.tell() - StartOfLocationInfo,
+                         DiagOpts->MessageLength, DiagOpts->ShowColors);
+}
+
+/*static*/ void SARIFDiagnostic::printDiagnosticLevel(
+    raw_ostream &OS, DiagnosticsEngine::Level Level, bool ShowColors) {
+  if (ShowColors) {
+    // Print diagnostic category in bold and color
+    switch (Level) {
+    case DiagnosticsEngine::Ignored:
+      llvm_unreachable("Invalid diagnostic type");
+    case DiagnosticsEngine::Note:
+      OS.changeColor(noteColor, true);
+      break;
+    case DiagnosticsEngine::Remark:
+      OS.changeColor(remarkColor, true);
+      break;
+    case DiagnosticsEngine::Warning:
+      OS.changeColor(warningColor, true);
+      break;
+    case DiagnosticsEngine::Error:
+      OS.changeColor(errorColor, true);
+      break;
+    case DiagnosticsEngine::Fatal:
+      OS.changeColor(fatalColor, true);
+      break;
+    }
+  }
+
+  switch (Level) {
+  case DiagnosticsEngine::Ignored:
+    llvm_unreachable("Invalid diagnostic type");
+  case DiagnosticsEngine::Note:
+    OS << "note: ";
+    break;
+  case DiagnosticsEngine::Remark:
+    OS << "remark: ";
+    break;
+  case DiagnosticsEngine::Warning:
+    OS << "warning: ";
+    break;
+  case DiagnosticsEngine::Error:
+    OS << "error: ";
+    break;
+  case DiagnosticsEngine::Fatal:
+    OS << "fatal error: ";
+    break;
+  }
+
+  if (ShowColors)
+    OS.resetColor();
+}
+
+/*static*/
+void SARIFDiagnostic::printDiagnosticMessage(
+    raw_ostream &OS, bool IsSupplemental, StringRef Message,
+    unsigned CurrentColumn, unsigned Columns, bool ShowColors) {
+  bool Bold = false;
+  if (ShowColors && !IsSupplemental) {
+    // Print primary diagnostic messages in bold and without color, to visually
+    // indicate the transition from continuation notes and other output.
+    OS.changeColor(savedColor, true);
+    Bold = true;
+  }
+
+  if (Columns)
+    printWordWrapped(OS, Message, Columns, CurrentColumn, Bold);
+  else {
+    bool Normal = true;
+    applyTemplateHighlighting(OS, Message, Normal, Bold);
+    assert(Normal && "Formatting should have returned to normal");
+  }
+
+  if (ShowColors)
+    OS.resetColor();
+  OS << '\n';
+}
+
+void SARIFDiagnostic::emitFilename(StringRef Filename,
+                                   const SourceManager &SM) {
+#ifdef _WIN32
+  SmallString<4096> TmpFilename;
+#endif
+  if (DiagOpts->AbsolutePath) {
+    auto File = SM.getFileManager().getFile(Filename);
+    if (File) {
+      // We want to print a simplified absolute path, i. e. without "dots".
+      //
+      // The hardest part here are the paths like "<part1>/<link>/../<part2>".
+      // On Unix-like systems, we cannot just collapse "<link>/..", because
+      // paths are resolved sequentially, and, thereby, the path
+      // "<part1>/<part2>" may point to a different location. That is why
+      // we use FileManager::getCanonicalName(), which expands all indirections
+      // with llvm::sys::fs::real_path() and caches the result.
+      //
+      // On the other hand, it would be better to preserve as much of the
+      // original path as possible, because that helps a user to recognize it.
+      // real_path() expands all links, which sometimes too much. Luckily,
+      // on Windows we can just use llvm::sys::path::remove_dots(), because,
+      // on that system, both aforementioned paths point to the same place.
+#ifdef _WIN32
+      TmpFilename = (*File)->getName();
+      llvm::sys::fs::make_absolute(TmpFilename);
+      llvm::sys::path::native(TmpFilename);
+      llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
+      Filename = StringRef(TmpFilename.data(), TmpFilename.size());
+#else
+      Filename = SM.getFileManager().getCanonicalName(*File);
+#endif
+    }
+  }
+
+  OS << Filename;
+}
+
+/// Print out the file/line/column information and include trace.
+///
+/// This method handlen the emission of the diagnostic location information.
+/// This includes extracting as much location information as is present for
+/// the diagnostic and printing it, as well as any include stack or source
+/// ranges necessary.
+void SARIFDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
+                                        DiagnosticsEngine::Level Level,
+                                        ArrayRef<CharSourceRange> Ranges) {
+  if (PLoc.isInvalid()) {
+    // At least print the file name if available:
+    FileID FID = Loc.getFileID();
+    if (FID.isValid()) {
+      if (const FileEntry *FE = Loc.getFileEntry()) {
+        emitFilename(FE->getName(), Loc.getManager());
+        OS << ": ";
+      }
+    }
+    return;
+  }
+  unsigned LineNo = PLoc.getLine();
+
+  if (!DiagOpts->ShowLocation)
+    return;
+
+  if (DiagOpts->ShowColors)
+    OS.changeColor(savedColor, true);
+
+  emitFilename(PLoc.getFilename(), Loc.getManager());
+  switch (DiagOpts->getFormat()) {
+  case DiagnosticOptions::SARIF:
+  case DiagnosticOptions::Clang:
+    if (DiagOpts->ShowLine)
+      OS << ':' << LineNo;
+    break;
+  case DiagnosticOptions::MSVC:
+    OS << '(' << LineNo;
+    break;
+  case DiagnosticOptions::Vi:
+    OS << " +" << LineNo;
+    break;
+  }
+
+  if (DiagOpts->ShowColumn)
+    // Compute the column number.
+    if (unsigned ColNo = PLoc.getColumn()) {
+      if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) {
+        OS << ',';
+        // Visual Studio 2010 or earlier expects column number to be off by one
+        if (LangOpts.MSCompatibilityVersion &&
+            !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2012))
+          ColNo--;
+      } else
+        OS << ':';
+      OS << ColNo;
+    }
+  switch (DiagOpts->getFormat()) {
+  case DiagnosticOptions::SARIF:
+  case DiagnosticOptions::Clang:
+  case DiagnosticOptions::Vi:
+    OS << ':';
+    break;
+  case DiagnosticOptions::MSVC:
+    // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the
+    // space and prints 'file(4): error'.
+    OS << ')';
+    if (LangOpts.MSCompatibilityVersion &&
+        !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
+      OS << ' ';
+    OS << ':';
+    break;
+  }
+
+  if (DiagOpts->ShowSourceRanges && !Ranges.empty()) {
+    FileID CaretFileID = Loc.getExpansionLoc().getFileID();
+    bool PrintedRange = false;
+
+    for (ArrayRef<CharSourceRange>::const_iterator RI = Ranges.begin(),
+                                                   RE = Ranges.end();
+         RI != RE; ++RI) {
+      // Ignore invalid ranges.
+      if (!RI->isValid())
+        continue;
+
+      auto &SM = Loc.getManager();
+      SourceLocation B = SM.getExpansionLoc(RI->getBegin());
+      CharSourceRange ERange = SM.getExpansionRange(RI->getEnd());
+      SourceLocation E = ERange.getEnd();
+      bool IsTokenRange = ERange.isTokenRange();
+
+      std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(B);
+      std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(E);
+
+      // If the start or end of the range is in another file, just discard
+      // it.
+      if (BInfo.first != CaretFileID || EInfo.first != CaretFileID)
+        continue;
+
+      // Add in the length of the token, so that we cover multi-char
+      // tokens.
+      unsigned TokSize = 0;
+      if (IsTokenRange)
+        TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts);
+
+      FullSourceLoc BF(B, SM), EF(E, SM);
+      OS << '{' << BF.getLineNumber() << ':' << BF.getColumnNumber() << '-'
+         << EF.getLineNumber() << ':' << (EF.getColumnNumber() + TokSize)
+         << '}';
+      PrintedRange = true;
+    }
+
+    if (PrintedRange)
+      OS << ':';
+  }
+  OS << ' ';
+}
+
+void SARIFDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) {
+  if (DiagOpts->ShowLocation && PLoc.isValid())
+    OS << "In file included from " << PLoc.getFilename() << ':'
+       << PLoc.getLine() << ":\n";
+  else
+    OS << "In included file:\n";
+}
+
+void SARIFDiagnostic::emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                         StringRef ModuleName) {
+  if (DiagOpts->ShowLocation && PLoc.isValid())
+    OS << "In module '" << ModuleName << "' imported from "
+       << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
+  else
+    OS << "In module '" << ModuleName << "':\n";
+}
+
+void SARIFDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc,
+                                                 PresumedLoc PLoc,
+                                                 StringRef ModuleName) {
+  if (DiagOpts->ShowLocation && PLoc.isValid())
+    OS << "While building module '" << ModuleName << "' imported from "
+       << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
+  else
+    OS << "While building module '" << ModuleName << "':\n";
+}
+
+/// Find the suitable set of lines to show to include a set of ranges.
+static llvm::Optional<std::pair<unsigned, unsigned>>
+findLinesForRange(const CharSourceRange &R, FileID FID,
+                  const SourceManager &SM) {
+  if (!R.isValid())
+    return None;
+
+  SourceLocation Begin = R.getBegin();
+  SourceLocation End = R.getEnd();
+  if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID)
+    return None;
+
+  return std::make_pair(SM.getExpansionLineNumber(Begin),
+                        SM.getExpansionLineNumber(End));
+}
+
+/// Add as much of range B into range A as possible without exceeding a maximum
+/// size of MaxRange. Ranges are inclusive.
+static std::pair<unsigned, unsigned>
+maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,
+              unsigned MaxRange) {
+  // If A is already the maximum size, we're done.
+  unsigned Slack = MaxRange - (A.second - A.first + 1);
+  if (Slack == 0)
+    return A;
+
+  // Easy case: merge succeeds within MaxRange.
+  unsigned Min = std::min(A.first, B.first);
+  unsigned Max = std::max(A.second, B.second);
+  if (Max - Min + 1 <= MaxRange)
+    return {Min, Max};
+
+  // If we can't reach B from A within MaxRange, there's nothing to do.
+  // Don't add lines to the range that contain nothing interesting.
+  if ((B.first > A.first && B.first - A.first + 1 > MaxRange) ||
+      (B.second < A.second && A.second - B.second + 1 > MaxRange))
+    return A;
+
+  // Otherwise, expand A towards B to produce a range of size MaxRange. We
+  // attempt to expand by the same amount in both directions if B strictly
+  // contains A.
+
+  // Expand downwards by up to half the available amount, then upwards as
+  // much as possible, then downwards as much as possible.
+  A.second = std::min(A.second + (Slack + 1) / 2, Max);
+  Slack = MaxRange - (A.second - A.first + 1);
+  A.first = std::max(Min + Slack, A.first) - Slack;
+  A.second = std::min(A.first + MaxRange - 1, Max);
+  return A;
+}
+
+/// Highlight a SourceRange (with ~'s) for any characters on LineNo.
+static void highlightRange(const CharSourceRange &R, unsigned LineNo,
+                           FileID FID, const SourceColumnMap &map,
+                           std::string &CaretLine, const SourceManager &SM,
+                           const LangOptions &LangOpts) {
+  if (!R.isValid())
+    return;
+
+  SourceLocation Begin = R.getBegin();
+  SourceLocation End = R.getEnd();
+
+  unsigned StartLineNo = SM.getExpansionLineNumber(Begin);
+  if (StartLineNo > LineNo || SM.getFileID(Begin) != FID)
+    return; // No intersection.
+
+  unsigned EndLineNo = SM.getExpansionLineNumber(End);
+  if (EndLineNo < LineNo || SM.getFileID(End) != FID)
+    return; // No intersection.
+
+  // Compute the column number of the start.
+  unsigned StartColNo = 0;
+  if (StartLineNo == LineNo) {
+    StartColNo = SM.getExpansionColumnNumber(Begin);
+    if (StartColNo)
+      --StartColNo; // Zero base the col #.
+  }
+
+  // Compute the column number of the end.
+  unsigned EndColNo = map.getSourceLine().size();
+  if (EndLineNo == LineNo) {
+    EndColNo = SM.getExpansionColumnNumber(End);
+    if (EndColNo) {
+      --EndColNo; // Zero base the col #.
+
+      // Add in the length of the token, so that we cover multi-char tokens if
+      // this is a token range.
+      if (R.isTokenRange())
+        EndColNo += Lexer::MeasureTokenLength(End, SM, LangOpts);
+    } else {
+      EndColNo = CaretLine.size();
+    }
+  }
+
+  assert(StartColNo <= EndColNo && "Invalid range!");
+
+  // Check that a token range does not highlight only whitespace.
+  if (R.isTokenRange()) {
+    // Pick the first non-whitespace column.
+    while (StartColNo < map.getSourceLine().size() &&
+           (map.getSourceLine()[StartColNo] == ' ' ||
+            map.getSourceLine()[StartColNo] == '\t'))
+      StartColNo = map.startOfNextColumn(StartColNo);
+
+    // Pick the last non-whitespace column.
+    if (EndColNo > map.getSourceLine().size())
+      EndColNo = map.getSourceLine().size();
+    while (EndColNo && (map.getSourceLine()[EndColNo - 1] == ' ' ||
+                        map.getSourceLine()[EndColNo - 1] == '\t'))
+      EndColNo = map.startOfPreviousColumn(EndColNo);
+
+    // If the start/end passed each other, then we are trying to highlight a
+    // range that just exists in whitespace. That most likely means we have
+    // a multi-line highlighting range that covers a blank line.
+    if (StartColNo > EndColNo) {
+      assert(StartLineNo != EndLineNo && "trying to highlight whitespace");
+      StartColNo = EndColNo;
+    }
+  }
+
+  assert(StartColNo <= map.getSourceLine().size() && "Invalid range!");
+  assert(EndColNo <= map.getSourceLine().size() && "Invalid range!");
+
+  // Fill the range with ~'s.
+  StartColNo = map.byteToContainingColumn(StartColNo);
+  EndColNo = map.byteToContainingColumn(EndColNo);
+
+  assert(StartColNo <= EndColNo && "Invalid range!");
+  if (CaretLine.size() < EndColNo)
+    CaretLine.resize(EndColNo, ' ');
+  std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~');
+}
+
+static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo,
+                                           const SourceColumnMap &map,
+                                           ArrayRef<FixItHint> Hints,
+                                           const SourceManager &SM,
+                                           const DiagnosticOptions *DiagOpts) {
+  std::string FixItInsertionLine;
+  if (Hints.empty() || !DiagOpts->ShowFixits)
+    return FixItInsertionLine;
+  unsigned PrevHintEndCol = 0;
+
+  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end(); I != E;
+       ++I) {
+    if (!I->CodeToInsert.empty()) {
+      // We have an insertion hint. Determine whether the inserted
+      // code contains no newlines and is on the same line as the caret.
+      std::pair<FileID, unsigned> HintLocInfo =
+          SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin());
+      if (FID == HintLocInfo.first &&
+          LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
+          StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
+        // Insert the new code into the line just below the code
+        // that the user wrote.
+        // Note: When modifying this function, be very careful about what is a
+        // "column" (printed width, platform-dependent) and what is a
+        // "byte offset" (SourceManager "column").
+        unsigned HintByteOffset =
+            SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
+
+        // The hint must start inside the source or right at the end
+        assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1);
+        unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
+
+        // If we inserted a long previous hint, push this one forwards, and add
+        // an extra space to show that this is not part of the previous
+        // completion. This is sort of the best we can do when two hints appear
+        // to overlap.
+        //
+        // Note that if this hint is located immediately after the previous
+        // hint, no space will be added, since the location is more important.
+        if (HintCol < PrevHintEndCol)
+          HintCol = PrevHintEndCol + 1;
+
+        // This should NOT use HintByteOffset, because the source might have
+        // Unicode characters in earlier columns.
+        unsigned NewFixItLineSize = FixItInsertionLine.size() +
+                                    (HintCol - PrevHintEndCol) +
+                                    I->CodeToInsert.size();
+        if (NewFixItLineSize > FixItInsertionLine.size())
+          FixItInsertionLine.resize(NewFixItLineSize, ' ');
+
+        std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(),
+                  FixItInsertionLine.end() - I->CodeToInsert.size());
+
+        PrevHintEndCol =
+            HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert);
+      }
+    }
+  }
+
+  expandTabs(FixItInsertionLine, DiagOpts->TabStop);
+
+  return FixItInsertionLine;
+}
+
+/// Emit a code snippet and caret line.
+///
+/// This routine emits a single line's code snippet and caret line..
+///
+/// \param Loc The location for the caret.
+/// \param Ranges The underlined ranges for this code snippet.
+/// \param Hints The FixIt hints active for this diagnostic.
+void SARIFDiagnostic::emitSnippetAndCaret(
+    FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+    SmallVectorImpl<CharSourceRange> &Ranges, ArrayRef<FixItHint> Hints) {
+  assert(Loc.isValid() && "must have a valid source location here");
+  assert(Loc.isFileID() && "must have a file location here");
+
+  // If caret diagnostics are enabled and we have location, we want to
+  // emit the caret.  However, we only do this if the location moved
+  // from the last diagnostic, if the last diagnostic was a note that
+  // was part of a different warning or error diagnostic, or if the
+  // diagnostic has ranges.  We don't want to emit the same caret
+  // multiple times if one loc has multiple diagnostics.
+  if (!DiagOpts->ShowCarets)
+    return;
+  if (Loc == LastLoc && Ranges.empty() && Hints.empty() &&
+      (LastLevel != DiagnosticsEngine::Note || Level == LastLevel))
+    return;
+
+  // Decompose the location into a FID/Offset pair.
+  std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+  FileID FID = LocInfo.first;
+  const SourceManager &SM = Loc.getManager();
+
+  // Get information about the buffer it points into.
+  bool Invalid = false;
+  StringRef BufData = Loc.getBufferData(&Invalid);
+  if (Invalid)
+    return;
+
+  unsigned CaretLineNo = Loc.getLineNumber();
+  unsigned CaretColNo = Loc.getColumnNumber();
+
+  // Arbitrarily stop showing snippets when the line is too long.
+  static const size_t MaxLineLengthToPrint = 4096;
+  if (CaretColNo > MaxLineLengthToPrint)
+    return;
+
+  // Find the set of lines to include.
+  const unsigned MaxLines = DiagOpts->SnippetLineLimit;
+  std::pair<unsigned, unsigned> Lines = {CaretLineNo, CaretLineNo};
+  for (SmallVectorImpl<CharSourceRange>::iterator I = Ranges.begin(),
+                                                  E = Ranges.end();
+       I != E; ++I)
+    if (auto OptionalRange = findLinesForRange(*I, FID, SM))
+      Lines = maybeAddRange(Lines, *OptionalRange, MaxLines);
+
+  for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; ++LineNo) {
+    const char *BufStart = BufData.data();
+    const char *BufEnd = BufStart + BufData.size();
+
+    // Rewind from the current position to the start of the line.
+    const char *LineStart =
+        BufStart +
+        SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
+    if (LineStart == BufEnd)
+      break;
+
+    // Compute the line end.
+    const char *LineEnd = LineStart;
+    while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd)
+      ++LineEnd;
+
+    // Arbitrarily stop showing snippets when the line is too long.
+    // FIXME: Don't print any lines in this case.
+    if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
+      return;
+
+    // Trim trailing null-bytes.
+    StringRef Line(LineStart, LineEnd - LineStart);
+    while (!Line.empty() && Line.back() == '\0' &&
+           (LineNo != CaretLineNo || Line.size() > CaretColNo))
+      Line = Line.drop_back();
+
+    // Copy the line of code into an std::string for ease of manipulation.
+    std::string SourceLine(Line.begin(), Line.end());
+
+    // Build the byte to column map.
+    const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
+
+    // Create a line for the caret that is filled with spaces that is the same
+    // number of columns as the line of source code.
+    std::string CaretLine(sourceColMap.columns(), ' ');
+
+    // Highlight all of the characters covered by Ranges with ~ characters.
+    for (SmallVectorImpl<CharSourceRange>::iterator I = Ranges.begin(),
+                                                    E = Ranges.end();
+         I != E; ++I)
+      highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts);
+
+    // Next, insert the caret itself.
+    if (CaretLineNo == LineNo) {
+      CaretColNo = sourceColMap.byteToContainingColumn(CaretColNo - 1);
+      if (CaretLine.size() < CaretColNo + 1)
+        CaretLine.resize(CaretColNo + 1, ' ');
+      CaretLine[CaretColNo] = '^';
+    }
+
+    std::string FixItInsertionLine = buildFixItInsertionLine(
+        FID, LineNo, sourceColMap, Hints, SM, DiagOpts.get());
+
+    // If the source line is too long for our terminal, select only the
+    // "interesting" source region within that line.
+    unsigned Columns = DiagOpts->MessageLength;
+    if (Columns)
+      selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine,
+                                    Columns, sourceColMap);
+
+    // If we are in -fdiagnostics-print-source-range-info mode, we are trying
+    // to produce easily machine parsable output.  Add a space before the
+    // source line and the caret to make it trivial to tell the main diagnostic
+    // line from what the user is intended to see.
+    if (DiagOpts->ShowSourceRanges) {
+      SourceLine = ' ' + SourceLine;
+      CaretLine = ' ' + CaretLine;
+    }
+
+    // Finally, remove any blank spaces from the end of CaretLine.
+    while (!CaretLine.empty() && CaretLine[CaretLine.size() - 1] == ' ')
+      CaretLine.erase(CaretLine.end() - 1);
+
+    // Emit what we have computed.
+    emitSnippet(SourceLine);
+
+    if (!CaretLine.empty()) {
+      if (DiagOpts->ShowColors)
+        OS.changeColor(caretColor, true);
+      OS << CaretLine << '\n';
+      if (DiagOpts->ShowColors)
+        OS.resetColor();
+    }
+
+    if (!FixItInsertionLine.empty()) {
+      if (DiagOpts->ShowColors)
+        // Print fixit line in color
+        OS.changeColor(fixitColor, false);
+      if (DiagOpts->ShowSourceRanges)
+        OS << ' ';
+      OS << FixItInsertionLine << '\n';
+      if (DiagOpts->ShowColors)
+        OS.resetColor();
+    }
+  }
+
+  // Print out any parseable fixit information requested by the options.
+  emitParseableFixits(Hints, SM);
+}
+
+void SARIFDiagnostic::emitSnippet(StringRef line) {
+  if (line.empty())
+    return;
+
+  size_t i = 0;
+
+  std::string to_print;
+  bool print_reversed = false;
+
+  while (i < line.size()) {
+    std::pair<SmallString<16>, bool> res =
+        printableTextForNextCharacter(line, &i, DiagOpts->TabStop);
+    bool was_printable = res.second;
+
+    if (DiagOpts->ShowColors && was_printable == print_reversed) {
+      if (print_reversed)
+        OS.reverseColor();
+      OS << to_print;
+      to_print.clear();
+      if (DiagOpts->ShowColors)
+        OS.resetColor();
+    }
+
+    print_reversed = !was_printable;
+    to_print += res.first.str();
+  }
+
+  if (print_reversed && DiagOpts->ShowColors)
+    OS.reverseColor();
+  OS << to_print;
+  if (print_reversed && DiagOpts->ShowColors)
+    OS.resetColor();
+
+  OS << '\n';
+}
+
+void SARIFDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints,
+                                          const SourceManager &SM) {
+  if (!DiagOpts->ShowParseableFixits)
+    return;
+
+  // We follow FixItRewriter's example in not (yet) handling
+  // fix-its in macros.
+  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end(); I != E;
+       ++I) {
+    if (I->RemoveRange.isInvalid() || I->RemoveRange.getBegin().isMacroID() ||
+        I->RemoveRange.getEnd().isMacroID())
+      return;
+  }
+
+  for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end(); I != E;
+       ++I) {
+    SourceLocation BLoc = I->RemoveRange.getBegin();
+    SourceLocation ELoc = I->RemoveRange.getEnd();
+
+    std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(BLoc);
+    std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(ELoc);
+
+    // Adjust for token ranges.
+    if (I->RemoveRange.isTokenRange())
+      EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts);
+
+    // We specifically do not do word-wrapping or tab-expansion here,
+    // because this is supposed to be easy to parse.
+    PresumedLoc PLoc = SM.getPresumedLoc(BLoc);
+    if (PLoc.isInvalid())
+      break;
+
+    OS << "fix-it:\"";
+    OS.write_escaped(PLoc.getFilename());
+    OS << "\":{" << SM.getLineNumber(BInfo.first, BInfo.second) << ':'
+       << SM.getColumnNumber(BInfo.first, BInfo.second) << '-'
+       << SM.getLineNumber(EInfo.first, EInfo.second) << ':'
+       << SM.getColumnNumber(EInfo.first, EInfo.second) << "}:\"";
+    OS.write_escaped(I->CodeToInsert);
+    OS << "\"\n";
+  }
+}
Index: clang/lib/Frontend/FrontendAction.cpp
===================================================================
--- clang/lib/Frontend/FrontendAction.cpp
+++ clang/lib/Frontend/FrontendAction.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclGroup.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -18,6 +19,7 @@
 #include "clang/Frontend/FrontendPluginRegistry.h"
 #include "clang/Frontend/LayoutOverrideSource.h"
 #include "clang/Frontend/MultiplexConsumer.h"
+#include "clang/Frontend/SARIFDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LiteralSupport.h"
@@ -717,8 +719,14 @@
       return false;
     }
   }
-  if (!CI.hasSourceManager())
+  if (!CI.hasSourceManager()) {
     CI.createSourceManager(CI.getFileManager());
+    if (CI.getDiagnosticOpts().getFormat() == DiagnosticOptions::SARIF) {
+      auto *Writer = new SarifDocumentWriter(CI.getSourceManager());
+      static_cast<SARIFDiagnosticPrinter *>(&CI.getDiagnosticClient())
+          ->setSarifWriter(Writer);
+    }
+  }
 
   // Set up embedding for any specified files. Do this before we load any
   // source files, including the primary module map for the compilation.
Index: clang/lib/Frontend/CompilerInstance.cpp
===================================================================
--- clang/lib/Frontend/CompilerInstance.cpp
+++ clang/lib/Frontend/CompilerInstance.cpp
@@ -12,6 +12,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Basic/SourceManager.h"
@@ -25,6 +26,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/FrontendPluginRegistry.h"
 #include "clang/Frontend/LogDiagnosticPrinter.h"
+#include "clang/Frontend/SARIFDiagnosticPrinter.h"
 #include "clang/Frontend/SerializedDiagnosticPrinter.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
@@ -346,6 +348,8 @@
   // implementing -verify.
   if (Client) {
     Diags->setClient(Client, ShouldOwnClient);
+  } else if (Opts->getFormat() == DiagnosticOptions::SARIF) {
+    Diags->setClient(new SARIFDiagnosticPrinter(llvm::errs(), Opts));
   } else
     Diags->setClient(new TextDiagnosticPrinter(llvm::errs(), Opts));
 
Index: clang/lib/Frontend/CMakeLists.txt
===================================================================
--- clang/lib/Frontend/CMakeLists.txt
+++ clang/lib/Frontend/CMakeLists.txt
@@ -31,6 +31,8 @@
   MultiplexConsumer.cpp
   PrecompiledPreamble.cpp
   PrintPreprocessedOutput.cpp
+  SARIFDiagnostic.cpp
+  SARIFDiagnosticPrinter.cpp
   SerializedDiagnosticPrinter.cpp
   SerializedDiagnosticReader.cpp
   TestModuleFileExtension.cpp
Index: clang/include/clang/Frontend/SARIFDiagnosticPrinter.h
===================================================================
--- /dev/null
+++ clang/include/clang/Frontend/SARIFDiagnosticPrinter.h
@@ -0,0 +1,75 @@
+//===--- SARIFDiagnosticPrinter.h - Text Diagnostic Client -------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a concrete diagnostic client, which prints the diagnostics to
+// standard error.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_SARIFDIAGNOSTICPRINTER_H
+#define LLVM_CLANG_FRONTEND_SARIFDIAGNOSTICPRINTER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/Sarif.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include <memory>
+
+namespace clang {
+class DiagnosticOptions;
+class LangOptions;
+class SARIFDiagnostic;
+class SarifDocumentWriter;
+
+class SARIFDiagnosticPrinter : public DiagnosticConsumer {
+  raw_ostream &OS;
+  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
+
+  /// Handle to the currently active text diagnostic emitter.
+  std::unique_ptr<SARIFDiagnostic> SARIFDiag;
+
+  /// A string to prefix to error messages.
+  std::string Prefix;
+
+  SarifDocumentWriter *Writer = nullptr;
+
+  unsigned OwnsOutputStream : 1;
+
+public:
+  SARIFDiagnosticPrinter(raw_ostream &os, DiagnosticOptions *diags,
+                         bool OwnsOutputStream = false);
+  ~SARIFDiagnosticPrinter() override;
+
+  /// setPrefix - Set the diagnostic printer prefix string, which will be
+  /// printed at the start of any diagnostics. If empty, no prefix string is
+  /// used.
+  void setPrefix(std::string Value) {
+    Prefix = std::move(Value);
+  } // TODO: In case we need this
+
+  bool hasSarifWriter() const { return Writer != nullptr; }
+
+  SarifDocumentWriter &getSarifWriter() const {
+    assert(Writer && "SarifWriter not set!");
+    return *Writer;
+  }
+
+  void setSarifWriter(SarifDocumentWriter *SarifWriter) {
+    Writer = SarifWriter;
+  }
+
+  void BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) override;
+  void EndSourceFile() override;
+  void HandleDiagnostic(DiagnosticsEngine::Level Level,
+                        const Diagnostic &Info) override;
+};
+
+} // end namespace clang
+
+#endif
Index: clang/include/clang/Frontend/SARIFDiagnostic.h
===================================================================
--- /dev/null
+++ clang/include/clang/Frontend/SARIFDiagnostic.h
@@ -0,0 +1,106 @@
+//===--- SARIFDiagnostic.h - Text Diagnostic Pretty-Printing -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class that provides support for textual pretty-printing of
+// diagnostics. It is used to implement the different code paths which require
+// such functionality in a consistent way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FRONTEND_SARIFDIAGNOSTIC_H
+#define LLVM_CLANG_FRONTEND_SARIFDIAGNOSTIC_H
+
+#include "clang/Basic/Sarif.h"
+#include "clang/Frontend/DiagnosticRenderer.h"
+#include "clang/Frontend/TextDiagnostic.h"
+
+namespace clang {
+
+class SARIFDiagnostic : public DiagnosticRenderer {
+  raw_ostream &OS;
+
+  SarifDocumentWriter *Writer;
+
+public:
+  SARIFDiagnostic(raw_ostream &OS,
+                 const LangOptions &LangOpts,
+                 DiagnosticOptions *DiagOpts,
+                 SarifDocumentWriter *Writer);
+
+  ~SARIFDiagnostic() = default;
+
+  /// Print the diagonstic level to a raw_ostream.
+  ///
+  /// This is a static helper that handles colorizing the level and formatting
+  /// it into an arbitrary output stream. This is used internally by the
+  /// SARIFDiagnostic emission code, but it can also be used directly by
+  /// consumers that don't have a source manager or other state that the full
+  /// SARIFDiagnostic logic requires.
+  static void printDiagnosticLevel(raw_ostream &OS,
+                                   DiagnosticsEngine::Level Level,
+                                   bool ShowColors);
+
+  /// Pretty-print a diagnostic message to a raw_ostream.
+  ///
+  /// This is a static helper to handle the line wrapping, colorizing, and
+  /// rendering of a diagnostic message to a particular ostream. It is
+  /// publicly visible so that clients which do not have sufficient state to
+  /// build a complete SARIFDiagnostic object can still get consistent
+  /// formatting of their diagnostic messages.
+  ///
+  /// \param OS Where the message is printed
+  /// \param IsSupplemental true if this is a continuation note diagnostic
+  /// \param Message The text actually printed
+  /// \param CurrentColumn The starting column of the first line, accounting
+  ///                      for any prefix.
+  /// \param Columns The number of columns to use in line-wrapping, 0 disables
+  ///                all line-wrapping.
+  /// \param ShowColors Enable colorizing of the message.
+  static void printDiagnosticMessage(raw_ostream &OS, bool IsSupplemental,
+                                     StringRef Message, unsigned CurrentColumn,
+                                     unsigned Columns, bool ShowColors);
+
+protected:
+  void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc,
+                             DiagnosticsEngine::Level Level, StringRef Message,
+                             ArrayRef<CharSourceRange> Ranges,
+                             DiagOrStoredDiag D) override;
+
+  void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc,
+                         DiagnosticsEngine::Level Level,
+                         ArrayRef<CharSourceRange> Ranges) override;
+
+  void emitCodeContext(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                       SmallVectorImpl<CharSourceRange> &Ranges,
+                       ArrayRef<FixItHint> Hints) override {
+    emitSnippetAndCaret(Loc, Level, Ranges, Hints);
+  }
+
+  void emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) override;
+
+  void emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                          StringRef ModuleName) override;
+
+  void emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc,
+                                  StringRef ModuleName) override;
+
+private:
+  void emitFilename(StringRef Filename, const SourceManager &SM);
+
+  void emitSnippetAndCaret(FullSourceLoc Loc, DiagnosticsEngine::Level Level,
+                           SmallVectorImpl<CharSourceRange> &Ranges,
+                           ArrayRef<FixItHint> Hints);
+
+  void emitSnippet(StringRef SourceLine);
+
+  void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
+};
+
+} // end namespace clang
+
+#endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131632: [WIP] Enable SARIF Diagnostics

Reply via email to