[PATCH] D69327: [Clang][ThinLTO] Add a cache for compile phase output.

Yuanfang Chen via Phabricator via cfe-commits Wed, 23 Oct 2019 00:57:08 -0700

ychen created this revision.
ychen added reviewers: mehdi_amini, pcc, tejohnson.
Herald added subscribers: llvm-commits, cfe-commits, dang, dexonsmith, 
steven_wu, aheejin, hiraditya, inglorion.
Herald added projects: clang, LLVM.


Currently the link phase has a object file cache whereas the compile phase 
always
perform optimizations (most likely happen for large source files and O2 
<https://reviews.llvm.org/owners/package/2/> or above)
which could potentially waste time optimizing a file that finally hit the 
object file cache.
For example, with Intel W-2133 and 64GB memory, compile X86ISelLowering.cpp 
with -flto=thin -O3
takes about 40s (takes about 10s with caching implemented by this patch).
The patch makes sure bitcodes that hit LTO cache also skip IR optimizations.

Add a driver/cc1 flag (-fthinlto-cache-dir, default off) to cache the minimized 
or regular ThinLTO bitcode file.
The caching is only trigger if the input is large than 
`-fthinlto-cache-min-filesize=`. Default minimum is 1024 IR instructions.
Cache pruning (`-fthinlto-cache-policy=`) shares the implementation with `lld 
--thinlto-cache-policy`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69327

Files:
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/CodeGenOptions.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/BackendUtil.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGen/thin_link_bitcode.c
  llvm/include/llvm/LTO/Caching.h
  llvm/lib/LTO/Caching.cpp

Index: llvm/lib/LTO/Caching.cpp
===================================================================
--- llvm/lib/LTO/Caching.cpp
+++ llvm/lib/LTO/Caching.cpp
@@ -28,7 +28,8 @@
 using namespace llvm::lto;
 
 Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
-                                            AddBufferFn AddBuffer) {
+                                            AddBufferFn AddBuffer,
+                                            StringRef Prefix) {
   if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPath))
     return errorCodeToError(EC);
 
@@ -36,7 +37,7 @@
     // This choice of file name allows the cache to be pruned (see pruneCache()
     // in include/llvm/Support/CachePruning.h).
     SmallString<64> EntryPath;
-    sys::path::append(EntryPath, CacheDirectoryPath, "llvmcache-" + Key);
+    sys::path::append(EntryPath, CacheDirectoryPath, Prefix + Key);
     // First, see if we have a cache hit.
     SmallString<64> ResultPath;
     Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
Index: llvm/include/llvm/LTO/Caching.h
===================================================================
--- llvm/include/llvm/LTO/Caching.h
+++ llvm/include/llvm/LTO/Caching.h
@@ -31,7 +31,8 @@
 /// file callback. This function also creates the cache directory if it does not
 /// already exist.
 Expected<NativeObjectCache> localCache(StringRef CacheDirectoryPath,
-                                       AddBufferFn AddBuffer);
+                                       AddBufferFn AddBuffer,
+                                       StringRef Prefix = "llvmcache-");
 
 } // namespace lto
 } // namespace llvm
Index: clang/test/CodeGen/thin_link_bitcode.c
===================================================================
--- clang/test/CodeGen/thin_link_bitcode.c
+++ clang/test/CodeGen/thin_link_bitcode.c
@@ -6,6 +6,17 @@
 // RUN: %clang_cc1 -o %t.newpm -flto=thin -fexperimental-new-pass-manager -fthin-link-bitcode=%t.newpm.nodebug -triple x86_64-unknown-linux-gnu -emit-llvm-bc -debug-info-kind=limited  %s
 // RUN: llvm-bcanalyzer -dump %t.newpm | FileCheck %s
 // RUN: llvm-bcanalyzer -dump %t.newpm.nodebug | FileCheck %s --check-prefix=NO_DEBUG
+
+// Test optimized bitcode files caching
+// RUN: rm -Rf %t.cache && mkdir %t.cache
+// RUN: %clang_cc1 -o %t -flto=thin -fthin-link-bitcode=%t.nodebug -fthinlto-cache-dir=%t.cache -fthinlto-cache-min-filesize=1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -debug-info-kind=limited %s
+// RUN: ls %t.cache | count 3
+// RUN: llvm-bcanalyzer -dump %t.cache/llvmcache-bc-* | FileCheck %s
+// RUN: llvm-bcanalyzer -dump %t.cache/llvmcache-thinlink-* | FileCheck %s --check-prefix=NO_DEBUG
+
+// RUN: rm -Rf %t.cache && mkdir %t.cache
+// RUN: %clang_cc1 -o %t -flto=thin -fthin-link-bitcode=%t.nodebug -fthinlto-cache-dir=%t.cache -fthinlto-cache-min-filesize=100 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -debug-info-kind=limited %s
+// RUN: ls %t.cache | count 0
 int main (void) {
   return 0;
 }
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -991,6 +991,11 @@
 
   Opts.ThinLinkBitcodeFile = Args.getLastArgValue(OPT_fthin_link_bitcode_EQ);
 
+  Opts.ThinLTOCacheDir = Args.getLastArgValue(OPT_fthinlto_cache_dir_EQ);
+  Opts.ThinLTOCachePolicy = Args.getLastArgValue(OPT_fthinlto_cache_policy_EQ);
+  Opts.ThinLTOCacheMinFilesize= getLastArgUInt64Value(
+      Args, options::OPT_fthinlto_cache_min_filesize_EQ, 1024);
+
   Opts.MSVolatile = Args.hasArg(OPT_fms_volatile);
 
   Opts.VectorizeLoop = Args.hasArg(OPT_vectorize_loops);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -3647,6 +3647,15 @@
     Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
   }
 
+  if (Args.getLastArg(options::OPT_fthinlto_cache_dir_EQ))
+    Args.AddLastArg(CmdArgs, options::OPT_fthinlto_cache_dir_EQ);
+
+  if (Args.getLastArg(options::OPT_fthinlto_cache_min_filesize_EQ))
+    Args.AddLastArg(CmdArgs, options::OPT_fthinlto_cache_min_filesize_EQ);
+
+  if (Args.getLastArg(options::OPT_fthinlto_cache_policy_EQ))
+    Args.AddLastArg(CmdArgs, options::OPT_fthinlto_cache_policy_EQ);
+
   if (Args.getLastArg(options::OPT_save_temps_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
 
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/LTO/Caching.h"
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -39,13 +40,17 @@
 #include "llvm/Passes/PassPlugin.h"
 #include "llvm/Passes/StandardInstrumentations.h"
 #include "llvm/Support/BuryPointer.h"
+#include "llvm/Support/CachePruning.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SHA1.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/VCSRevision.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Coroutines.h"
@@ -90,8 +95,6 @@
 
   Timer CodeGenerationTime;
 
-  std::unique_ptr<raw_pwrite_stream> OS;
-
   TargetIRAnalysis getTargetIRAnalysis() const {
     if (TM)
       return TM->getTargetIRAnalysis();
@@ -117,6 +120,11 @@
   bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action,
                      raw_pwrite_stream &OS, raw_pwrite_stream *DwoOS);
 
+  using OptimizerFn =
+      std::function<void(Module &, raw_ostream &, raw_ostream *)>;
+  void runOptimizer(bool IsThinLTOCompilePhase, raw_ostream &OS,
+                    raw_ostream *ThinLinkOS, OptimizerFn OptimizeModule);
+
   std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) {
     std::error_code EC;
     auto F = std::make_unique<llvm::ToolOutputFile>(Path, EC,
@@ -804,6 +812,252 @@
   return true;
 }
 
+/// Computes a unique hash for the Module considering the list of options.
+/// The hash is produced in \p Key.
+/// FIXME: This is being very conservertive by hashing a lot of options.
+///        Could be relaxed if necessary.
+static void computeLTOCacheKey(SmallString<40> &Key, const Module &M,
+                     const CodeGenOptions &CGOpts,
+                     const clang::TargetOptions &TOpts,
+                     const LangOptions &LOpts) {
+  SHA1 Hasher;
+
+  // Start with the compiler revision
+  Hasher.update(LLVM_VERSION_STRING);
+  Hasher.update(LLVM_REVISION);
+
+  auto AddString = [&](StringRef S) {
+    Hasher.update(S);
+    Hasher.update(ArrayRef<uint8_t>{0});
+  };
+  auto AddIntegral = [&](auto I) {
+    constexpr size_t NumBytes = sizeof(I);
+    uint8_t Data[NumBytes];
+    for (uint8_t &D : Data) {
+      D = I;
+      I >>= 8;
+    }
+    Hasher.update(ArrayRef<uint8_t>{Data, NumBytes});
+  };
+  auto AddEnum = [&](auto E) {
+    using UnderTy = typename std::underlying_type<decltype(E)>::type;
+    AddIntegral(static_cast<UnderTy>(E));
+  };
+
+  // Include the hash for the current module.
+  ModuleHash ModHash = {{0}};
+  SmallString<0> BC;
+  raw_svector_ostream BCOS(BC);
+  WriteBitcodeToFile(M, BCOS, CGOpts.EmitLLVMUseLists, nullptr,
+                     /*GenerateHash=*/true, &ModHash);
+  Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+
+  // Hash CodeGenOptions.
+  {
+    // Not hashing caching related options.
+    CodeGenOptions &CGOptsCopy = const_cast<CodeGenOptions&>(CGOpts);
+    SaveAndRestore<std::string> Save1(CGOptsCopy.ThinLTOCacheDir, "");
+    SaveAndRestore<std::string> Save2(CGOptsCopy.ThinLTOCachePolicy, "");
+    // FIXME: SaveAndRestore does not work for bitfields.
+    const unsigned SavedThinLTOCacheMinFilesize =
+        CGOptsCopy.ThinLTOCacheMinFilesize;
+    CGOptsCopy.ThinLTOCacheMinFilesize = 0;
+
+    uint64_t HashInput = 0;
+    unsigned NumOfBits = 0;
+  #define CODEGENOPT(Name, Bits, Default)                 \
+    if (Bits + NumOfBits > sizeof(HashInput)) {           \
+      AddIntegral(HashInput);                             \
+      HashInput = 0;                                      \
+      NumOfBits = 0;                                      \
+    }                                                     \
+    HashInput &= (CGOpts.Name << NumOfBits);              \
+    NumOfBits += Bits;
+  #define ENUM_CODEGENOPT(Name, Type, Bits, Default)      \
+    AddEnum(CGOpts.get##Name());
+  #include "clang/Basic/CodeGenOptions.def"
+    if (HashInput)
+      AddIntegral(HashInput);
+
+    CGOptsCopy.ThinLTOCacheMinFilesize= SavedThinLTOCacheMinFilesize;
+  }
+
+  AddString(CGOpts.CodeModel);
+  AddString(CGOpts.CoverageDataFile);
+  AddString(CGOpts.CoverageNotesFile);
+  AddString(CGOpts.ProfileFilterFiles);
+  AddString(CGOpts.ProfileExcludeFiles);
+  Hasher.update(ArrayRef<uint8_t>((const uint8_t *)CGOpts.CoverageVersion, 4));
+  AddString(CGOpts.DebugPass);
+  AddString(CGOpts.DebugCompilationDir);
+  AddString(CGOpts.DwarfDebugFlags);
+  AddString(CGOpts.RecordCommandLine);
+  AddString(CGOpts.FloatABI);
+  AddString(CGOpts.FPDenormalMode);
+  AddString(CGOpts.LimitFloatPrecision);
+  AddString(CGOpts.MainFileName);
+  AddEnum(CGOpts.RelocationModel);
+  AddString(CGOpts.ThreadModel);
+  AddString(CGOpts.TrapFuncName);
+  for (const auto &I : CGOpts.DependentLibraries)
+    AddString(I);
+  for (const auto &I : CGOpts.LinkerOptions)
+    AddString(I);
+  AddString(CGOpts.InstrProfileOutput);
+  AddString(CGOpts.SampleProfileFile);
+  AddString(CGOpts.ProfileInstrumentUsePath);
+  AddString(CGOpts.ProfileRemappingFile);
+  AddString(CGOpts.ThinLinkBitcodeFile);
+  AddString(CGOpts.SaveTempsFilePrefix);
+  AddString(CGOpts.CudaGpuBinaryFileName);
+  AddString(CGOpts.OptRecordFile);
+  AddString(CGOpts.OptRecordPasses);
+  AddString(CGOpts.OptRecordFormat);
+  AddString(CGOpts.SymbolPartition);
+  for (const auto &I : CGOpts.RewriteMapFiles)
+    AddString(I);
+  AddIntegral(static_cast<size_t>(CGOpts.SanitizeRecover.Mask.hash_value()));
+  AddIntegral(static_cast<size_t>(CGOpts.SanitizeTrap.Mask.hash_value()));
+  Hasher.update(ArrayRef<uint8_t>(CGOpts.CmdArgs));
+  for (const auto &I : CGOpts.NoBuiltinFuncs)
+    AddString(I);
+  for (const auto &I : CGOpts.Reciprocals)
+    AddString(I);
+  AddString(CGOpts.PreferVectorWidth);
+  AddIntegral(CGOpts.XRayInstrumentationBundle.Mask);
+  for (const auto &I : CGOpts.DefaultFunctionAttrs)
+    AddString(I);
+  for (const auto &I : CGOpts.PassPlugins)
+    AddString(I);
+
+  // Hash clang::TargetOptions
+  AddString(TOpts.Triple);
+  AddString(TOpts.HostTriple);
+  AddString(TOpts.CPU);
+  AddString(TOpts.FPMath);
+  AddString(TOpts.ABI);
+  AddEnum(TOpts.EABIVersion);
+  AddString(TOpts.LinkerVersion);
+  for (const auto &I : TOpts.FeaturesAsWritten)
+    AddString(I);
+  for (const auto &I : TOpts.Features)
+    AddString(I);
+  AddString(TOpts.CodeModel);
+  AddString(TOpts.SDKVersion.getAsString());
+
+  // Hash LangOptions
+  for (const auto &I : LOpts.SanitizerBlacklistFiles)
+    AddString(I);
+  AddEnum(LOpts.getDefaultFPContractMode());
+  AddIntegral(static_cast<uint8_t>(LOpts.SjLjExceptions));
+  AddIntegral(static_cast<uint8_t>(LOpts.SEHExceptions));
+  AddIntegral(static_cast<uint8_t>(LOpts.DWARFExceptions));
+  AddIntegral(static_cast<uint8_t>(LOpts.WasmExceptions));
+  AddIntegral(static_cast<size_t>(LOpts.Sanitize.Mask.hash_value()));
+  AddIntegral(static_cast<uint8_t>(LOpts.ObjCAutoRefCount));
+  AddIntegral(static_cast<uint8_t>(LOpts.Coroutines));
+
+  Key = toHex(Hasher.result());
+}
+
+void EmitAssemblyHelper::runOptimizer(bool IsThinLTOCompilePhase,
+                                      raw_ostream &OS, raw_ostream *ThinLinkOS,
+                                      OptimizerFn OptimizeModule) {
+  auto ModuleIsEligibleForCaching = [&](const Module &M) {
+    unsigned ModuleInstCount = 0;
+    for (const auto &F : M)
+      for (const auto &BB : F) {
+        ModuleInstCount += BB.size();
+        // FIXME: Tune default file size limit.
+        if (ModuleInstCount > CodeGenOpts.ThinLTOCacheMinFilesize)
+          return true;
+      }
+    return false;
+  };
+
+  const bool UseThinLTOCache = IsThinLTOCompilePhase &&
+                               !CodeGenOpts.ThinLTOCacheDir.empty() &&
+                               ModuleIsEligibleForCaching(*TheModule);
+  if (!UseThinLTOCache) {
+    OptimizeModule(*TheModule, OS, ThinLinkOS);
+    return;
+  }
+
+  SmallString<40> Key;
+  computeLTOCacheKey(Key, *TheModule, CodeGenOpts, TargetOpts, LangOpts);
+
+  unsigned CacheDiagID =
+      Diags.getCustomDiagID(DiagnosticsEngine::Error,
+                            "ThinLTO cache error for '%0': '%1'");
+
+  bool ThinLTOCacheHit = false;
+
+  auto ThinLinkOSHook = [&](size_t, std::unique_ptr<MemoryBuffer> mb) {
+    *ThinLinkOS << mb->getBuffer();
+    ThinLTOCacheHit = true;
+  };
+
+  auto OSHook = [&](size_t, std::unique_ptr<MemoryBuffer> mb) {
+    OS << mb->getBuffer();
+    if (!ThinLinkOS) {
+      ThinLTOCacheHit = true;
+      return;
+    }
+
+    if (Expected<lto::NativeObjectCache> Cache = lto::localCache(
+            CodeGenOpts.ThinLTOCacheDir, ThinLinkOSHook, "llvmcache-thinlink-"))
+      (void)(*Cache)(/*Task=*/-1, Key);
+    else
+      Diags.Report(CacheDiagID)
+          << CodeGenOpts.ThinLTOCacheDir << Cache.takeError();
+  };
+
+  if (Expected<lto::NativeObjectCache> BitcodeCache =
+          lto::localCache(CodeGenOpts.ThinLTOCacheDir, OSHook, "llvmcache-bc-"))
+    (void)(*BitcodeCache)(/*Task=*/-1, Key);
+  else
+    Diags.Report(CacheDiagID)
+        << CodeGenOpts.ThinLTOCacheDir << BitcodeCache.takeError();
+
+  if (ThinLTOCacheHit)
+    return;
+
+  // Run optimizer passes if miss ThinLTO cache.
+  buffer_ostream CacheOS(OS);
+  OptimizeModule(*TheModule, CacheOS, ThinLinkOS);
+
+  if (Expected<CachePruningPolicy> ThinLTOCachePolicy =
+      parseCachePruningPolicy(CodeGenOpts.ThinLTOCachePolicy))
+    pruneCache(CodeGenOpts.ThinLTOCacheDir, *ThinLTOCachePolicy);
+  else
+    Diags.Report(CacheDiagID)
+        << CodeGenOpts.ThinLTOCachePolicy << ThinLTOCachePolicy.takeError();
+
+  // Save OS to cache.
+  SmallString<64> EntryPath;
+  sys::path::append(EntryPath, CodeGenOpts.ThinLTOCacheDir,
+                    "llvmcache-bc-" + Key);
+  std::unique_ptr<llvm::ToolOutputFile> CopyOS = openOutputFile(EntryPath);
+  if (!CopyOS)
+    return;
+  CopyOS->os() << CacheOS.str();
+  CopyOS->keep();
+
+  // Save
+  if (ThinLinkOS) {
+    EntryPath.clear();
+    sys::path::append(EntryPath, CodeGenOpts.ThinLTOCacheDir,
+                      "llvmcache-thinlink-" + Key);
+    ThinLinkOS->flush();
+    if (std::error_code EC =
+            sys::fs::copy_file(CodeGenOpts.ThinLinkBitcodeFile, EntryPath)) {
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Warning, "ThinLTO saving to cache fail: '%0'");
+      Diags.Report(DiagID) << EC.message();
+    }
+  }
+}
+
 void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
                                       std::unique_ptr<raw_pwrite_stream> OS) {
   TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr);
@@ -834,6 +1088,7 @@
   CodeGenPasses.add(
       createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
 
+  bool IsThinLTOCompilePhase = false;
   std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
 
   switch (Action) {
@@ -849,8 +1104,7 @@
       }
       TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
                                CodeGenOpts.EnableSplitLTOUnit);
-      PerModulePasses.add(createWriteThinLTOBitcodePass(
-          *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
+      IsThinLTOCompilePhase = true;
     } else {
       // Emit a module summary by default for Regular LTO except for ld64
       // targets
@@ -890,25 +1144,31 @@
   // Before executing passes, print the final values of the LLVM options.
   cl::PrintOptionValues();
 
-  // Run passes. For now we do all passes at once, but eventually we
-  // would like to have the option of streaming code generation.
-
-  {
-    PrettyStackTraceString CrashInfo("Per-function optimization");
-    llvm::TimeTraceScope TimeScope("PerFunctionPasses", StringRef(""));
-
-    PerFunctionPasses.doInitialization();
-    for (Function &F : *TheModule)
-      if (!F.isDeclaration())
-        PerFunctionPasses.run(F);
-    PerFunctionPasses.doFinalization();
-  }
+  // Now that we have all of the passes ready, run them.
+  OptimizerFn OptFn = [&](Module &M, raw_ostream &OS, raw_ostream *ThinLinkOS) {
+    // Run passes. For now we do all passes at once, but eventually we
+    // would like to have the option of streaming code generation.
+    {
+      PrettyStackTraceString CrashInfo("Per-function optimization");
+      llvm::TimeTraceScope TimeScope("PerFunctionPasses", StringRef(""));
+
+      PerFunctionPasses.doInitialization();
+      for (Function &F : *TheModule)
+        if (!F.isDeclaration())
+          PerFunctionPasses.run(F);
+      PerFunctionPasses.doFinalization();
+    }
 
-  {
-    PrettyStackTraceString CrashInfo("Per-module optimization passes");
-    llvm::TimeTraceScope TimeScope("PerModulePasses", StringRef(""));
-    PerModulePasses.run(*TheModule);
-  }
+    {
+      PrettyStackTraceString CrashInfo("Per-module optimization passes");
+      llvm::TimeTraceScope TimeScope("PerModulePasses", StringRef(""));
+      if (IsThinLTOCompilePhase)
+        PerModulePasses.add(createWriteThinLTOBitcodePass(OS, ThinLinkOS));
+      PerModulePasses.run(M);
+    }
+  };
+  raw_ostream *TheThinLinkOS = ThinLinkOS ? &ThinLinkOS->os() : nullptr;
+  runOptimizer(IsThinLTOCompilePhase, *OS, TheThinLinkOS, OptFn);
 
   {
     PrettyStackTraceString CrashInfo("Code generation");
@@ -1256,6 +1516,7 @@
   // create that pass manager here and use it as needed below.
   legacy::PassManager CodeGenPasses;
   bool NeedCodeGen = false;
+  bool IsThinLTOCompilePhase = false;
   std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
 
   // Append any output we need to the pass manager.
@@ -1272,8 +1533,7 @@
       }
       TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
                                CodeGenOpts.EnableSplitLTOUnit);
-      MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
-                                                           : nullptr));
+      IsThinLTOCompilePhase = true;
     } else {
       // Emit a module summary by default for Regular LTO except for ld64
       // targets
@@ -1319,10 +1579,14 @@
   cl::PrintOptionValues();
 
   // Now that we have all of the passes ready, run them.
-  {
+  OptimizerFn OptFn = [&](Module &M, raw_ostream &OS, raw_ostream *ThinLinkOS) {
     PrettyStackTraceString CrashInfo("Optimizer");
-    MPM.run(*TheModule, MAM);
-  }
+    if (IsThinLTOCompilePhase)
+      MPM.addPass(ThinLTOBitcodeWriterPass(OS, ThinLinkOS));
+    MPM.run(M, MAM);
+  };
+  raw_ostream *TheThinLinkOS = ThinLinkOS ? &ThinLinkOS->os() : nullptr;
+  runOptimizer(IsThinLTOCompilePhase, *OS, TheThinLinkOS, OptFn);
 
   // Now if needed, run the legacy PM for codegen.
   if (NeedCodeGen) {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1309,6 +1309,15 @@
 def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
   Flags<[CoreOption, CC1Option]>, Group<f_Group>,
   HelpText<"Perform ThinLTO importing using provided function summary index">;
+def fthinlto_cache_dir_EQ : Joined<["-"], "fthinlto-cache-dir=">,
+  Flags<[CoreOption, CC1Option]>, Group<f_Group>, MetaVarName<"<directory>">,
+  HelpText<"Specify cache directory for ThinLTO compile phase">;
+def fthinlto_cache_min_filesize_EQ : Joined<["-"], "fthinlto-cache-min-filesize=">,
+  Flags<[CoreOption, CC1Option]>, Group<f_Group>, MetaVarName<"<Num of IR instructions> (default 1024)">,
+  HelpText<"Files with size that are below this limit skip ThinLTO compile phase caching">;
+def fthinlto_cache_policy_EQ : Joined<["-"], "fthinlto-cache-policy=">,
+  Flags<[CoreOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Pruning policy for the ThinLTO compile phase cache">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
                                 Group<f_Group>, Flags<[DriverOption, CoreOption]>;
 def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>,
Index: clang/include/clang/Basic/CodeGenOptions.h
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.h
+++ clang/include/clang/Basic/CodeGenOptions.h
@@ -235,6 +235,12 @@
   /// the summary and module symbol table (and not, e.g. any debug metadata).
   std::string ThinLinkBitcodeFile;
 
+  /// The directory ThinLTO caches compile phase outputs.
+  std::string ThinLTOCacheDir;
+
+  /// Pruning policy for ThinLTO compile phase cache.
+  std::string ThinLTOCachePolicy;
+
   /// Prefix to use for -save-temps output.
   std::string SaveTempsFilePrefix;
 
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -121,6 +121,9 @@
 				     /// CFI and traditional whole program
 				     /// devirtualization that require whole
 				     /// program IR support.
+CODEGENOPT(ThinLTOCacheMinFilesize, 32, 1024)
+                          ///< Minimal number of instructions of the Module to
+                          ///< be considered for ThinLTO compile phase caching.
 CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can
                                               ///< be used with an incremental
                                               ///< linker.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D69327: [Clang][ThinLTO] Add a cache for compile phase output.

Reply via email to