tejohnson created this revision.
tejohnson added reviewers: snehasish, davidxl.
Herald added subscribers: wlei, Enna1, ormris, wenlei, steven_wu, hiraditya.
Herald added a project: All.
tejohnson requested review of this revision.
Herald added a subscriber: MaskRay.
Herald added projects: clang, LLVM.

Previously the MemProf profile was expected to be in the same profile
file as a normal PGO profile, passed via the usual -fprofile-use=
option, and was matched in the same pass. To simplify profile
preparation, since the raw MemProf profile requires the binary for
symbolization and may be simpler to index separately from the raw PGO
profile, and also to enable providing a MemProf profile for a SamplePGO
build, separate out the MemProf feedback option and matching pass.

This patch adds the -fmemory-profile-use=${file} option, and the
provided file is passed down to LLVM and ultimately used in a new
MemProfUsePass which performs the matching of just the memory profile
contents of that file.

Note that a single profile file containing both normal PGO and MemProf
profile data is still supported, and the relevant profile data is
matched by the appropriate matching pass(es) based on which option(s)
the profile is provided with (the same profile file can be supplied to
both feedback options).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154856

Files:
  clang/include/clang/Basic/CodeGenOptions.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/BackendUtil.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/memprof.cpp
  clang/test/Driver/fmemprof.cpp
  llvm/include/llvm/Support/PGOOptions.h
  llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Passes/PassBuilderPipelines.cpp
  llvm/lib/Passes/PassRegistry.def
  llvm/lib/Support/PGOOptions.cpp
  llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
  llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
  llvm/test/Transforms/PGOProfile/memprof.ll
  llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
  llvm/tools/opt/NewPMDriver.cpp

Index: llvm/tools/opt/NewPMDriver.cpp
===================================================================
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -176,6 +176,9 @@
                                       "Use sampled profile to guide PGO.")));
 static cl::opt<std::string> ProfileFile("profile-file",
                                  cl::desc("Path to the profile."), cl::Hidden);
+static cl::opt<std::string>
+    MemoryProfileFile("memory-profile-file",
+                      cl::desc("Path to the memory profile."), cl::Hidden);
 
 static cl::opt<CSPGOKind> CSPGOKindFlag(
     "cspgo-kind", cl::init(NoCSPGO), cl::Hidden,
@@ -336,19 +339,21 @@
   std::optional<PGOOptions> P;
   switch (PGOKindFlag) {
   case InstrGen:
-    P = PGOOptions(ProfileFile, "", "", FS, PGOOptions::IRInstr);
+    P = PGOOptions(ProfileFile, "", "", MemoryProfileFile, FS,
+                   PGOOptions::IRInstr);
     break;
   case InstrUse:
-    P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS,
+    P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
                    PGOOptions::IRUse);
     break;
   case SampleUse:
-    P = PGOOptions(ProfileFile, "", ProfileRemappingFile, FS,
+    P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
                    PGOOptions::SampleUse);
     break;
   case NoPGO:
-    if (DebugInfoForProfiling || PseudoProbeForProfiling)
-      P = PGOOptions("", "", "", nullptr, PGOOptions::NoAction,
+    if (DebugInfoForProfiling || PseudoProbeForProfiling ||
+        !MemoryProfileFile.empty())
+      P = PGOOptions("", "", "", MemoryProfileFile, FS, PGOOptions::NoAction,
                      PGOOptions::NoCSAction, DebugInfoForProfiling,
                      PseudoProbeForProfiling);
     else
@@ -369,8 +374,9 @@
         P->CSAction = PGOOptions::CSIRInstr;
         P->CSProfileGenFile = CSProfileGenFile;
       } else
-        P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, FS,
-                       PGOOptions::NoAction, PGOOptions::CSIRInstr);
+        P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile,
+                       /*MemoryProfile=*/"", FS, PGOOptions::NoAction,
+                       PGOOptions::CSIRInstr);
     } else /* CSPGOKindFlag == CSInstrUse */ {
       if (!P) {
         errs() << "CSInstrUse needs to be together with InstrUse";
Index: llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
===================================================================
--- llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
+++ llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
@@ -11,7 +11,7 @@
 
 ; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
 
-; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s
 
 ; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev
 
Index: llvm/test/Transforms/PGOProfile/memprof.ll
===================================================================
--- llvm/test/Transforms/PGOProfile/memprof.ll
+++ llvm/test/Transforms/PGOProfile/memprof.ll
@@ -23,19 +23,36 @@
 ; ALL-NOT: memprof record not found for function hash
 ; ALL-NOT: no profile data available for function
 
-;; Feed back memprof-only profile
-; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+;; Using a memprof-only profile for memprof-use should only give memprof metadata
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
 ; There should not be any PGO metadata
 ; MEMPROFONLY-NOT: !prof
 
-;; Feed back pgo-only profile
-; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
+;; Test the same thing but by passing the memory profile through to a default
+;; pipeline via -memory-profile-file=, which should cause the necessary field
+;; of the PGOOptions structure to be populated with the profile filename.
+; RUN: opt < %s -passes='default<O2>' -memory-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+
+;; Using a pgo+memprof profile for memprof-use should only give memprof metadata
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+
+;; Using a pgo-only profile for memprof-use should give an error
+; RUN: not opt < %s -passes='memprof-use<profile-filename=%t.pgoprofdata>' -S 2>&1 | FileCheck %s --check-prefixes=MEMPROFWITHPGOONLY
+; MEMPROFWITHPGOONLY: Not a memory profile
+
+;; Using a memprof-only profile for pgo-instr-use should give an error
+; RUN: not opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -S 2>&1 | FileCheck %s --check-prefixes=PGOWITHMEMPROFONLY
+; PGOWITHMEMPROFONLY: Not an IR level instrumentation profile
+
+;; Using a pgo+memprof profile for pgo-instr-use should only give pgo metadata
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
 ; There should not be any memprof related metadata
 ; PGOONLY-NOT: !memprof
 ; PGOONLY-NOT: !callsite
 
-;; Feed back pgo+memprof-only profile
-; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL
+;; Using a pgo+memprof profile for both memprof-use and pgo-instr-use should
+;; give both memprof and pgo metadata.
+; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
 
 ; ModuleID = 'memprof.cc'
 source_filename = "memprof.cc"
Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -62,8 +62,6 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -97,7 +95,6 @@
 #include "llvm/IR/Value.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfReader.h"
-#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/Casting.h"
@@ -107,7 +104,6 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Triple.h"
@@ -120,18 +116,15 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
-#include <map>
 #include <memory>
 #include <numeric>
 #include <optional>
-#include <set>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
-using namespace llvm::memprof;
 using ProfileCount = Function::ProfileCount;
 using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
 
@@ -146,7 +139,6 @@
 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
-STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
 STATISTIC(NumOfCSPGOSelectInsts,
@@ -203,31 +195,31 @@
     cl::desc("Append function hash to the name of COMDAT function to avoid "
              "function hash mismatch due to the preinliner"));
 
+namespace llvm {
 // Command line option to enable/disable the warning about missing profile
 // information.
-static cl::opt<bool>
-    PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
-                   cl::desc("Use this option to turn on/off "
-                            "warnings about missing profile data for "
-                            "functions."));
+cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Use this option to turn on/off "
+                                      "warnings about missing profile data for "
+                                      "functions."));
 
-namespace llvm {
 // Command line option to enable/disable the warning about a hash mismatch in
 // the profile data.
 cl::opt<bool>
     NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
                       cl::desc("Use this option to turn off/on "
                                "warnings about profile cfg mismatch."));
-} // namespace llvm
 
 // Command line option to enable/disable the warning about a hash mismatch in
 // the profile data for Comdat functions, which often turns out to be false
 // positive due to the pre-instrumentation inline.
-static cl::opt<bool> NoPGOWarnMismatchComdatWeak(
+cl::opt<bool> NoPGOWarnMismatchComdatWeak(
     "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
     cl::desc("The option is used to turn on/off "
              "warnings about hash mismatch for comdat "
              "or weak functions."));
+} // namespace llvm
 
 // Command line option to enable/disable select instruction instrumentation.
 static cl::opt<bool>
@@ -321,10 +313,6 @@
     "pgo-function-size-threshold", cl::Hidden,
     cl::desc("Do not instrument functions smaller than this threshold."));
 
-static cl::opt<bool> MatchMemProf(
-    "pgo-match-memprof", cl::init(true), cl::Hidden,
-    cl::desc("Perform matching and annotation of memprof profiles."));
-
 static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
     "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
     cl::desc("Do not instrument functions with the number of critical edges "
@@ -1081,9 +1069,6 @@
   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
                     InstrProfRecord::CountPseudoKind &PseudoKind);
 
-  // Read memprof data for the instrumented function from profile.
-  bool readMemprof(IndexedInstrProfReader *PGOReader);
-
   // Populate the counts for all BBs.
   void populateCounters();
 
@@ -1303,257 +1288,6 @@
   F.setMetadata(LLVMContext::MD_annotation, MD);
 }
 
-static void addCallsiteMetadata(Instruction &I,
-                                std::vector<uint64_t> &InlinedCallStack,
-                                LLVMContext &Ctx) {
-  I.setMetadata(LLVMContext::MD_callsite,
-                buildCallstackMetadata(InlinedCallStack, Ctx));
-}
-
-static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
-                               uint32_t Column) {
-  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
-      HashBuilder;
-  HashBuilder.add(Function, LineOffset, Column);
-  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
-  uint64_t Id;
-  std::memcpy(&Id, Hash.data(), sizeof(Hash));
-  return Id;
-}
-
-static uint64_t computeStackId(const memprof::Frame &Frame) {
-  return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
-}
-
-static void addCallStack(CallStackTrie &AllocTrie,
-                         const AllocationInfo *AllocInfo) {
-  SmallVector<uint64_t> StackIds;
-  for (const auto &StackFrame : AllocInfo->CallStack)
-    StackIds.push_back(computeStackId(StackFrame));
-  auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
-                                AllocInfo->Info.getAllocCount(),
-                                AllocInfo->Info.getTotalLifetime());
-  AllocTrie.addCallStack(AllocType, StackIds);
-}
-
-// Helper to compare the InlinedCallStack computed from an instruction's debug
-// info to a list of Frames from profile data (either the allocation data or a
-// callsite). For callsites, the StartIndex to use in the Frame array may be
-// non-zero.
-static bool
-stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
-                                   ArrayRef<uint64_t> InlinedCallStack,
-                                   unsigned StartIndex = 0) {
-  auto StackFrame = ProfileCallStack.begin() + StartIndex;
-  auto InlCallStackIter = InlinedCallStack.begin();
-  for (; StackFrame != ProfileCallStack.end() &&
-         InlCallStackIter != InlinedCallStack.end();
-       ++StackFrame, ++InlCallStackIter) {
-    uint64_t StackId = computeStackId(*StackFrame);
-    if (StackId != *InlCallStackIter)
-      return false;
-  }
-  // Return true if we found and matched all stack ids from the call
-  // instruction.
-  return InlCallStackIter == InlinedCallStack.end();
-}
-
-bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
-  if (!MatchMemProf)
-    return true;
-
-  auto &Ctx = M->getContext();
-
-  auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
-  Expected<memprof::MemProfRecord> MemProfResult =
-      PGOReader->getMemProfRecord(FuncGUID);
-  if (Error E = MemProfResult.takeError()) {
-    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
-      auto Err = IPE.get();
-      bool SkipWarning = false;
-      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
-                        << FuncInfo.FuncName << ": ");
-      if (Err == instrprof_error::unknown_function) {
-        NumOfMemProfMissing++;
-        SkipWarning = !PGOWarnMissing;
-        LLVM_DEBUG(dbgs() << "unknown function");
-      } else if (Err == instrprof_error::hash_mismatch) {
-        SkipWarning =
-            NoPGOWarnMismatch ||
-            (NoPGOWarnMismatchComdatWeak &&
-             (F.hasComdat() ||
-              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
-        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
-      }
-
-      if (SkipWarning)
-        return;
-
-      std::string Msg =
-          (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
-           std::to_string(FuncInfo.FunctionHash))
-              .str();
-
-      Ctx.diagnose(
-          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
-    });
-    return false;
-  }
-
-  // Build maps of the location hash to all profile data with that leaf location
-  // (allocation info and the callsites).
-  std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
-  // For the callsites we need to record the index of the associated frame in
-  // the frame array (see comments below where the map entries are added).
-  std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
-      LocHashToCallSites;
-  const auto MemProfRec = std::move(MemProfResult.get());
-  for (auto &AI : MemProfRec.AllocSites) {
-    // Associate the allocation info with the leaf frame. The later matching
-    // code will match any inlined call sequences in the IR with a longer prefix
-    // of call stack frames.
-    uint64_t StackId = computeStackId(AI.CallStack[0]);
-    LocHashToAllocInfo[StackId].insert(&AI);
-  }
-  for (auto &CS : MemProfRec.CallSites) {
-    // Need to record all frames from leaf up to and including this function,
-    // as any of these may or may not have been inlined at this point.
-    unsigned Idx = 0;
-    for (auto &StackFrame : CS) {
-      uint64_t StackId = computeStackId(StackFrame);
-      LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
-      // Once we find this function, we can stop recording.
-      if (StackFrame.Function == FuncGUID)
-        break;
-    }
-    assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
-  }
-
-  auto GetOffset = [](const DILocation *DIL) {
-    return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
-           0xffff;
-  };
-
-  // Now walk the instructions, looking up the associated profile data using
-  // dbug locations.
-  for (auto &BB : F) {
-    for (auto &I : BB) {
-      if (I.isDebugOrPseudoInst())
-        continue;
-      // We are only interested in calls (allocation or interior call stack
-      // context calls).
-      auto *CI = dyn_cast<CallBase>(&I);
-      if (!CI)
-        continue;
-      auto *CalledFunction = CI->getCalledFunction();
-      if (CalledFunction && CalledFunction->isIntrinsic())
-        continue;
-      // List of call stack ids computed from the location hashes on debug
-      // locations (leaf to inlined at root).
-      std::vector<uint64_t> InlinedCallStack;
-      // Was the leaf location found in one of the profile maps?
-      bool LeafFound = false;
-      // If leaf was found in a map, iterators pointing to its location in both
-      // of the maps. It might exist in neither, one, or both (the latter case
-      // can happen because we don't currently have discriminators to
-      // distinguish the case when a single line/col maps to both an allocation
-      // and another callsite).
-      std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
-          AllocInfoIter;
-      std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
-                                            unsigned>>>::iterator CallSitesIter;
-      for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
-           DIL = DIL->getInlinedAt()) {
-        // Use C++ linkage name if possible. Need to compile with
-        // -fdebug-info-for-profiling to get linkage name.
-        StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
-        if (Name.empty())
-          Name = DIL->getScope()->getSubprogram()->getName();
-        auto CalleeGUID = Function::getGUID(Name);
-        auto StackId =
-            computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
-        // LeafFound will only be false on the first iteration, since we either
-        // set it true or break out of the loop below.
-        if (!LeafFound) {
-          AllocInfoIter = LocHashToAllocInfo.find(StackId);
-          CallSitesIter = LocHashToCallSites.find(StackId);
-          // Check if the leaf is in one of the maps. If not, no need to look
-          // further at this call.
-          if (AllocInfoIter == LocHashToAllocInfo.end() &&
-              CallSitesIter == LocHashToCallSites.end())
-            break;
-          LeafFound = true;
-        }
-        InlinedCallStack.push_back(StackId);
-      }
-      // If leaf not in either of the maps, skip inst.
-      if (!LeafFound)
-        continue;
-
-      // First add !memprof metadata from allocation info, if we found the
-      // instruction's leaf location in that map, and if the rest of the
-      // instruction's locations match the prefix Frame locations on an
-      // allocation context with the same leaf.
-      if (AllocInfoIter != LocHashToAllocInfo.end()) {
-        // Only consider allocations via new, to reduce unnecessary metadata,
-        // since those are the only allocations that will be targeted initially.
-        if (!isNewLikeFn(CI, &FuncInfo.TLI))
-          continue;
-        // We may match this instruction's location list to multiple MIB
-        // contexts. Add them to a Trie specialized for trimming the contexts to
-        // the minimal needed to disambiguate contexts with unique behavior.
-        CallStackTrie AllocTrie;
-        for (auto *AllocInfo : AllocInfoIter->second) {
-          // Check the full inlined call stack against this one.
-          // If we found and thus matched all frames on the call, include
-          // this MIB.
-          if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
-                                                 InlinedCallStack))
-            addCallStack(AllocTrie, AllocInfo);
-        }
-        // We might not have matched any to the full inlined call stack.
-        // But if we did, create and attach metadata, or a function attribute if
-        // all contexts have identical profiled behavior.
-        if (!AllocTrie.empty()) {
-          // MemprofMDAttached will be false if a function attribute was
-          // attached.
-          bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
-          assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
-          if (MemprofMDAttached) {
-            // Add callsite metadata for the instruction's location list so that
-            // it simpler later on to identify which part of the MIB contexts
-            // are from this particular instruction (including during inlining,
-            // when the callsite metdata will be updated appropriately).
-            // FIXME: can this be changed to strip out the matching stack
-            // context ids from the MIB contexts and not add any callsite
-            // metadata here to save space?
-            addCallsiteMetadata(I, InlinedCallStack, Ctx);
-          }
-        }
-        continue;
-      }
-
-      // Otherwise, add callsite metadata. If we reach here then we found the
-      // instruction's leaf location in the callsites map and not the allocation
-      // map.
-      assert(CallSitesIter != LocHashToCallSites.end());
-      for (auto CallStackIdx : CallSitesIter->second) {
-        // If we found and thus matched all frames on the call, create and
-        // attach call stack metadata.
-        if (stackFrameIncludesInlinedCallStack(
-                *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
-          addCallsiteMetadata(I, InlinedCallStack, Ctx);
-          // Only need to find one with a matching call stack and add a single
-          // callsite metadata.
-          break;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
 void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
   handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
     auto &Ctx = M->getContext();
@@ -2262,7 +1996,7 @@
     return false;
 
   // TODO: might need to change the warning once the clang option is finalized.
-  if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
+  if (!PGOReader->isIRLevelProfile()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
         ProfileFileName.data(), "Not an IR level instrumentation profile"));
     return false;
@@ -2307,14 +2041,6 @@
     }
     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
                     InstrumentFuncEntry, HasSingleByteCoverage);
-    // Read and match memprof first since we do this via debug info and can
-    // match even if there is an IR mismatch detected for regular PGO below.
-    if (PGOReader->hasMemoryProfile())
-      Func.readMemprof(PGOReader.get());
-
-    if (!PGOReader->isIRLevelProfile())
-      continue;
-
     if (HasSingleByteCoverage) {
       Func.populateCoverage(PGOReader.get());
       continue;
Index: llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -18,9 +18,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
@@ -30,16 +33,29 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <map>
+#include <set>
 
 using namespace llvm;
+using namespace llvm::memprof;
 
 #define DEBUG_TYPE "memprof"
 
+namespace llvm {
+extern cl::opt<bool> PGOWarnMissing;
+extern cl::opt<bool> NoPGOWarnMismatch;
+extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
+} // namespace llvm
+
 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
 
 // Size of memory mapped to a single shadow location.
@@ -128,6 +144,7 @@
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
+STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
 
 namespace {
 
@@ -601,3 +618,299 @@
 
   return FunctionModified;
 }
+
+static void addCallsiteMetadata(Instruction &I,
+                                std::vector<uint64_t> &InlinedCallStack,
+                                LLVMContext &Ctx) {
+  I.setMetadata(LLVMContext::MD_callsite,
+                buildCallstackMetadata(InlinedCallStack, Ctx));
+}
+
+static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
+                               uint32_t Column) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+      HashBuilder;
+  HashBuilder.add(Function, LineOffset, Column);
+  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+  uint64_t Id;
+  std::memcpy(&Id, Hash.data(), sizeof(Hash));
+  return Id;
+}
+
+static uint64_t computeStackId(const memprof::Frame &Frame) {
+  return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
+}
+
+static void addCallStack(CallStackTrie &AllocTrie,
+                         const AllocationInfo *AllocInfo) {
+  SmallVector<uint64_t> StackIds;
+  for (const auto &StackFrame : AllocInfo->CallStack)
+    StackIds.push_back(computeStackId(StackFrame));
+  auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
+                                AllocInfo->Info.getAllocCount(),
+                                AllocInfo->Info.getTotalLifetime());
+  AllocTrie.addCallStack(AllocType, StackIds);
+}
+
+// Helper to compare the InlinedCallStack computed from an instruction's debug
+// info to a list of Frames from profile data (either the allocation data or a
+// callsite). For callsites, the StartIndex to use in the Frame array may be
+// non-zero.
+static bool
+stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
+                                   ArrayRef<uint64_t> InlinedCallStack,
+                                   unsigned StartIndex = 0) {
+  auto StackFrame = ProfileCallStack.begin() + StartIndex;
+  auto InlCallStackIter = InlinedCallStack.begin();
+  for (; StackFrame != ProfileCallStack.end() &&
+         InlCallStackIter != InlinedCallStack.end();
+       ++StackFrame, ++InlCallStackIter) {
+    uint64_t StackId = computeStackId(*StackFrame);
+    if (StackId != *InlCallStackIter)
+      return false;
+  }
+  // Return true if we found and matched all stack ids from the call
+  // instruction.
+  return InlCallStackIter == InlinedCallStack.end();
+}
+
+static bool readMemprof(Module &M, Function &F,
+                        IndexedInstrProfReader *MemProfReader,
+                        const TargetLibraryInfo &TLI) {
+  auto &Ctx = M.getContext();
+
+  auto FuncName = getPGOFuncName(F);
+  auto FuncGUID = Function::getGUID(FuncName);
+  Expected<memprof::MemProfRecord> MemProfResult =
+      MemProfReader->getMemProfRecord(FuncGUID);
+  if (Error E = MemProfResult.takeError()) {
+    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+      auto Err = IPE.get();
+      bool SkipWarning = false;
+      LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
+                        << ": ");
+      if (Err == instrprof_error::unknown_function) {
+        NumOfMemProfMissing++;
+        SkipWarning = !PGOWarnMissing;
+        LLVM_DEBUG(dbgs() << "unknown function");
+      } else if (Err == instrprof_error::hash_mismatch) {
+        SkipWarning =
+            NoPGOWarnMismatch ||
+            (NoPGOWarnMismatchComdatWeak &&
+             (F.hasComdat() ||
+              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+      }
+
+      if (SkipWarning)
+        return;
+
+      std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
+                         Twine(" Hash = ") + std::to_string(FuncGUID))
+                            .str();
+
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+    });
+    return false;
+  }
+
+  // Build maps of the location hash to all profile data with that leaf location
+  // (allocation info and the callsites).
+  std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
+  // For the callsites we need to record the index of the associated frame in
+  // the frame array (see comments below where the map entries are added).
+  std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+      LocHashToCallSites;
+  const auto MemProfRec = std::move(MemProfResult.get());
+  for (auto &AI : MemProfRec.AllocSites) {
+    // Associate the allocation info with the leaf frame. The later matching
+    // code will match any inlined call sequences in the IR with a longer prefix
+    // of call stack frames.
+    uint64_t StackId = computeStackId(AI.CallStack[0]);
+    LocHashToAllocInfo[StackId].insert(&AI);
+  }
+  for (auto &CS : MemProfRec.CallSites) {
+    // Need to record all frames from leaf up to and including this function,
+    // as any of these may or may not have been inlined at this point.
+    unsigned Idx = 0;
+    for (auto &StackFrame : CS) {
+      uint64_t StackId = computeStackId(StackFrame);
+      LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+      // Once we find this function, we can stop recording.
+      if (StackFrame.Function == FuncGUID)
+        break;
+    }
+    assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
+  }
+
+  auto GetOffset = [](const DILocation *DIL) {
+    return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+           0xffff;
+  };
+
+  // Now walk the instructions, looking up the associated profile data using
+  // dbug locations.
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (I.isDebugOrPseudoInst())
+        continue;
+      // We are only interested in calls (allocation or interior call stack
+      // context calls).
+      auto *CI = dyn_cast<CallBase>(&I);
+      if (!CI)
+        continue;
+      auto *CalledFunction = CI->getCalledFunction();
+      if (CalledFunction && CalledFunction->isIntrinsic())
+        continue;
+      // List of call stack ids computed from the location hashes on debug
+      // locations (leaf to inlined at root).
+      std::vector<uint64_t> InlinedCallStack;
+      // Was the leaf location found in one of the profile maps?
+      bool LeafFound = false;
+      // If leaf was found in a map, iterators pointing to its location in both
+      // of the maps. It might exist in neither, one, or both (the latter case
+      // can happen because we don't currently have discriminators to
+      // distinguish the case when a single line/col maps to both an allocation
+      // and another callsite).
+      std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
+          AllocInfoIter;
+      std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+                                            unsigned>>>::iterator CallSitesIter;
+      for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
+           DIL = DIL->getInlinedAt()) {
+        // Use C++ linkage name if possible. Need to compile with
+        // -fdebug-info-for-profiling to get linkage name.
+        StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+        if (Name.empty())
+          Name = DIL->getScope()->getSubprogram()->getName();
+        auto CalleeGUID = Function::getGUID(Name);
+        auto StackId =
+            computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
+        // LeafFound will only be false on the first iteration, since we either
+        // set it true or break out of the loop below.
+        if (!LeafFound) {
+          AllocInfoIter = LocHashToAllocInfo.find(StackId);
+          CallSitesIter = LocHashToCallSites.find(StackId);
+          // Check if the leaf is in one of the maps. If not, no need to look
+          // further at this call.
+          if (AllocInfoIter == LocHashToAllocInfo.end() &&
+              CallSitesIter == LocHashToCallSites.end())
+            break;
+          LeafFound = true;
+        }
+        InlinedCallStack.push_back(StackId);
+      }
+      // If leaf not in either of the maps, skip inst.
+      if (!LeafFound)
+        continue;
+
+      // First add !memprof metadata from allocation info, if we found the
+      // instruction's leaf location in that map, and if the rest of the
+      // instruction's locations match the prefix Frame locations on an
+      // allocation context with the same leaf.
+      if (AllocInfoIter != LocHashToAllocInfo.end()) {
+        // Only consider allocations via new, to reduce unnecessary metadata,
+        // since those are the only allocations that will be targeted initially.
+        if (!isNewLikeFn(CI, &TLI))
+          continue;
+        // We may match this instruction's location list to multiple MIB
+        // contexts. Add them to a Trie specialized for trimming the contexts to
+        // the minimal needed to disambiguate contexts with unique behavior.
+        CallStackTrie AllocTrie;
+        for (auto *AllocInfo : AllocInfoIter->second) {
+          // Check the full inlined call stack against this one.
+          // If we found and thus matched all frames on the call, include
+          // this MIB.
+          if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
+                                                 InlinedCallStack))
+            addCallStack(AllocTrie, AllocInfo);
+        }
+        // We might not have matched any to the full inlined call stack.
+        // But if we did, create and attach metadata, or a function attribute if
+        // all contexts have identical profiled behavior.
+        if (!AllocTrie.empty()) {
+          // MemprofMDAttached will be false if a function attribute was
+          // attached.
+          bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
+          assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
+          if (MemprofMDAttached) {
+            // Add callsite metadata for the instruction's location list so that
+            // it simpler later on to identify which part of the MIB contexts
+            // are from this particular instruction (including during inlining,
+            // when the callsite metdata will be updated appropriately).
+            // FIXME: can this be changed to strip out the matching stack
+            // context ids from the MIB contexts and not add any callsite
+            // metadata here to save space?
+            addCallsiteMetadata(I, InlinedCallStack, Ctx);
+          }
+        }
+        continue;
+      }
+
+      // Otherwise, add callsite metadata. If we reach here then we found the
+      // instruction's leaf location in the callsites map and not the allocation
+      // map.
+      assert(CallSitesIter != LocHashToCallSites.end());
+      for (auto CallStackIdx : CallSitesIter->second) {
+        // If we found and thus matched all frames on the call, create and
+        // attach call stack metadata.
+        if (stackFrameIncludesInlinedCallStack(
+                *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+          addCallsiteMetadata(I, InlinedCallStack, Ctx);
+          // Only need to find one with a matching call stack and add a single
+          // callsite metadata.
+          break;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
+                               IntrusiveRefCntPtr<vfs::FileSystem> FS)
+    : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
+  if (!FS)
+    this->FS = vfs::getRealFileSystem();
+}
+
+PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
+  LLVM_DEBUG(dbgs() << "Read in memory profile:");
+  auto &Ctx = M.getContext();
+  auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
+  if (Error E = ReaderOrErr.takeError()) {
+    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
+    });
+    return PreservedAnalyses::all();
+  }
+
+  std::unique_ptr<IndexedInstrProfReader> MemProfReader =
+      std::move(ReaderOrErr.get());
+  if (!MemProfReader) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
+    return PreservedAnalyses::all();
+  }
+
+  if (!MemProfReader->hasMemoryProfile()) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
+                                          "Not a memory profile"));
+    return PreservedAnalyses::all();
+  }
+
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+
+    const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+    readMemprof(M, F, MemProfReader.get(), TLI);
+  }
+
+  return PreservedAnalyses::none();
+}
Index: llvm/lib/Support/PGOOptions.cpp
===================================================================
--- llvm/lib/Support/PGOOptions.cpp
+++ llvm/lib/Support/PGOOptions.cpp
@@ -13,12 +13,13 @@
 
 PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
                        std::string ProfileRemappingFile,
+                       std::string MemoryProfile,
                        IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
                        CSPGOAction CSAction, bool DebugInfoForProfiling,
                        bool PseudoProbeForProfiling)
     : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
-      ProfileRemappingFile(ProfileRemappingFile), Action(Action),
-      CSAction(CSAction),
+      ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
+      Action(Action), CSAction(CSAction),
       DebugInfoForProfiling(DebugInfoForProfiling ||
                             (Action == SampleUse && !PseudoProbeForProfiling)),
       PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) {
@@ -36,13 +37,18 @@
   // a profile.
   assert(this->CSAction != CSIRUse || this->Action == IRUse);
 
-  // If neither Action nor CSAction, DebugInfoForProfiling or
-  // PseudoProbeForProfiling needs to be true.
+  // Cannot optimize with MemProf profile during IR instrumentation.
+  assert(this->MemoryProfile.empty() || this->Action != PGOOptions::IRInstr);
+
+  // If neither Action nor CSAction nor MemoryProfile are set,
+  // DebugInfoForProfiling or PseudoProbeForProfiling needs to be true.
   assert(this->Action != NoAction || this->CSAction != NoCSAction ||
-         this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+         !this->MemoryProfile.empty() || this->DebugInfoForProfiling ||
+         this->PseudoProbeForProfiling);
 
   // If we need to use the profile, the VFS cannot be nullptr.
-  assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse));
+  assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse ||
+                       !this->MemoryProfile.empty()));
 }
 
 PGOOptions::PGOOptions(const PGOOptions &) = default;
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -183,6 +183,13 @@
                         },
                         parseEmbedBitcodePassOptions,
                         "thinlto;emit-summary")
+MODULE_PASS_WITH_PARAMS("memprof-use",
+                         "MemProfUsePass",
+                        [](std::string Opts) {
+                          return MemProfUsePass(Opts);
+                        },
+                        parseMemProfUsePassOptions,
+                        "profile-filename=S")
 #undef MODULE_PASS_WITH_PARAMS
 
 #ifndef CGSCC_ANALYSIS
Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1102,6 +1102,10 @@
       PGOOpt->CSAction == PGOOptions::CSIRInstr)
     MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
 
+  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+      !PGOOpt->MemoryProfile.empty())
+    MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
+
   // Synthesize function entry counts for non-PGO compilation.
   if (EnableSyntheticCounts && !PGOOpt)
     MPM.addPass(SyntheticCountsPropagation());
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -1071,6 +1071,23 @@
                                "MemorySSAPrinterPass");
 }
 
+Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
+  std::string Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    if (ParamName.consume_front("profile-filename=")) {
+      Result = ParamName.str();
+    } else {
+      return make_error<StringError>(
+          formatv("invalid MemProfUse pass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
 } // namespace
 
 /// Tests whether a pass name starts with a valid prefix for a default pipeline
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -236,20 +236,21 @@
   auto FS = vfs::getRealFileSystem();
   std::optional<PGOOptions> PGOOpt;
   if (!Conf.SampleProfile.empty())
-    PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, FS,
-                        PGOOptions::SampleUse, PGOOptions::NoCSAction, true);
+    PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
+                        /*MemoryProfile=*/"", FS, PGOOptions::SampleUse,
+                        PGOOptions::NoCSAction, true);
   else if (Conf.RunCSIRInstr) {
-    PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, FS,
-                        PGOOptions::IRUse, PGOOptions::CSIRInstr,
-                        Conf.AddFSDiscriminator);
+    PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
+                        /*MemoryProfile=*/"", FS, PGOOptions::IRUse,
+                        PGOOptions::CSIRInstr, Conf.AddFSDiscriminator);
   } else if (!Conf.CSIRProfile.empty()) {
-    PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, FS,
-                        PGOOptions::IRUse, PGOOptions::CSIRUse,
-                        Conf.AddFSDiscriminator);
+    PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
+                        /*MemoryProfile=*/"", FS, PGOOptions::IRUse,
+                        PGOOptions::CSIRUse, Conf.AddFSDiscriminator);
     NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
   } else if (Conf.AddFSDiscriminator) {
-    PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction,
-                        PGOOptions::NoCSAction, true);
+    PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
+                        PGOOptions::NoAction, PGOOptions::NoCSAction, true);
   }
   TM->setPGOOption(PGOOpt);
 
Index: llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
===================================================================
--- llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -12,6 +12,7 @@
 #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
 #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
 
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -20,6 +21,10 @@
 class Module;
 class ModulePass;
 
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
 /// Public interface to the memory profiler pass for instrumenting code to
 /// profile memory accesses.
 ///
@@ -43,6 +48,17 @@
   static bool isRequired() { return true; }
 };
 
+class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
+public:
+  explicit MemProfUsePass(std::string MemoryProfileFile,
+                          IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+  std::string MemoryProfileFileName;
+  IntrusiveRefCntPtr<vfs::FileSystem> FS;
+};
+
 } // namespace llvm
 
 #endif
Index: llvm/include/llvm/Support/PGOOptions.h
===================================================================
--- llvm/include/llvm/Support/PGOOptions.h
+++ llvm/include/llvm/Support/PGOOptions.h
@@ -28,7 +28,7 @@
   enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
   enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
   PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
-             std::string ProfileRemappingFile,
+             std::string ProfileRemappingFile, std::string MemoryProfile,
              IntrusiveRefCntPtr<vfs::FileSystem> FS,
              PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction,
              bool DebugInfoForProfiling = false,
@@ -40,6 +40,7 @@
   std::string ProfileFile;
   std::string CSProfileGenFile;
   std::string ProfileRemappingFile;
+  std::string MemoryProfile;
   PGOAction Action;
   CSPGOAction CSAction;
   bool DebugInfoForProfiling;
Index: clang/test/Driver/fmemprof.cpp
===================================================================
--- clang/test/Driver/fmemprof.cpp
+++ clang/test/Driver/fmemprof.cpp
@@ -8,3 +8,12 @@
 // DIR: ld{{.*}}libclang_rt.memprof{{.*}}libclang_rt.memprof_cxx
 // OFF-NOT: "-fmemory-profile"
 // OFF-NOT: libclang_rt.memprof
+
+// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=USE
+// USE: "-cc1" {{.*}} "-fmemory-profile-use=foo"
+
+// RUN: %clangxx -target x86_64-linux-gnu -fmemory-profile -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=CONFLICTWITHMEMPROFINSTR
+// CONFLICTWITHMEMPROFINSTR: error: invalid argument '-fmemory-profile-use=foo' not allowed with '-fmemory-profile'
+
+// RUN: %clangxx -target x86_64-linux-gnu -fprofile-generate -fmemory-profile-use=foo %s -### 2>&1 | FileCheck %s --check-prefix=CONFLICTWITHPGOINSTR
+// CONFLICTWITHPGOINSTR: error: invalid argument '-fmemory-profile-use=foo' not allowed with '-fprofile-generate'
Index: clang/test/CodeGen/memprof.cpp
===================================================================
--- clang/test/CodeGen/memprof.cpp
+++ clang/test/CodeGen/memprof.cpp
@@ -16,8 +16,8 @@
 
 // Profile use:
 // Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile.
-// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager  -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE
-// USE: Running pass: PGOInstrumentationUse on [module]
+// RUN: %clang_cc1 -O2 -fmemory-profile-use=%t.memprofdata %s -fdebug-pass-manager  -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE
+// USE: Running pass: MemProfUsePass on [module]
 
 char *foo() {
   return new char[10];
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4946,6 +4946,18 @@
       !MemProfArg->getOption().matches(options::OPT_fno_memory_profile))
     MemProfArg->render(Args, CmdArgs);
 
+  if (auto *MemProfUseArg =
+          Args.getLastArg(options::OPT_fmemory_profile_use_EQ)) {
+    if (MemProfArg)
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << MemProfUseArg->getAsString(Args) << MemProfArg->getAsString(Args);
+    if (auto *PGOInstrArg = Args.getLastArg(options::OPT_fprofile_generate,
+                                            options::OPT_fprofile_generate_EQ))
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << MemProfUseArg->getAsString(Args) << PGOInstrArg->getAsString(Args);
+    MemProfUseArg->render(Args, CmdArgs);
+  }
+
   // Embed-bitcode option.
   // Only white-listed flags below are allowed to be embedded.
   if (C.getDriver().embedBitcodeInObject() && !IsUsingLTO &&
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -762,31 +762,37 @@
     PGOOpt = PGOOptions(
         CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName()
                                                : CodeGenOpts.InstrProfileOutput,
-        "", "", nullptr, PGOOptions::IRInstr, PGOOptions::NoCSAction,
-        CodeGenOpts.DebugInfoForProfiling);
+        "", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr,
+        PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling);
   else if (CodeGenOpts.hasProfileIRUse()) {
     // -fprofile-use.
     auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse
                                                     : PGOOptions::NoCSAction;
-    PGOOpt =
-        PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "",
-                   CodeGenOpts.ProfileRemappingFile, VFS, PGOOptions::IRUse,
-                   CSAction, CodeGenOpts.DebugInfoForProfiling);
+    PGOOpt = PGOOptions(
+        CodeGenOpts.ProfileInstrumentUsePath, "",
+        CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS,
+        PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling);
   } else if (!CodeGenOpts.SampleProfileFile.empty())
     // -fprofile-sample-use
     PGOOpt = PGOOptions(
         CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile,
-        VFS, PGOOptions::SampleUse, PGOOptions::NoCSAction,
-        CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling);
+        CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse,
+        PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
+        CodeGenOpts.PseudoProbeForProfiling);
+  else if (!CodeGenOpts.MemoryProfileUsePath.empty())
+    // -fmemory-profile-use (without any of the above options)
+    PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS,
+                        PGOOptions::NoAction, PGOOptions::NoCSAction,
+                        CodeGenOpts.DebugInfoForProfiling);
   else if (CodeGenOpts.PseudoProbeForProfiling)
     // -fpseudo-probe-for-profiling
-    PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction,
-                        PGOOptions::NoCSAction,
+    PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
+                        PGOOptions::NoAction, PGOOptions::NoCSAction,
                         CodeGenOpts.DebugInfoForProfiling, true);
   else if (CodeGenOpts.DebugInfoForProfiling)
     // -fdebug-info-for-profiling
-    PGOOpt = PGOOptions("", "", "", nullptr, PGOOptions::NoAction,
-                        PGOOptions::NoCSAction, true);
+    PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
+                        PGOOptions::NoAction, PGOOptions::NoCSAction, true);
 
   // Check to see if we want to generate a CS profile.
   if (CodeGenOpts.hasProfileCSIRInstr()) {
@@ -808,8 +814,8 @@
                      CodeGenOpts.InstrProfileOutput.empty()
                          ? getDefaultProfileGenName()
                          : CodeGenOpts.InstrProfileOutput,
-                     "", nullptr, PGOOptions::NoAction, PGOOptions::CSIRInstr,
-                     CodeGenOpts.DebugInfoForProfiling);
+                     "", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction,
+                     PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling);
   }
   if (TM)
     TM->setPGOOption(PGOOpt);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1772,6 +1772,10 @@
 def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">,
     Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<directory>">,
     HelpText<"Enable heap memory profiling and dump results into <directory>">;
+def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">,
+    Group<f_Group>, Flags<[CC1Option, CoreOption]>, MetaVarName<"<pathname>">,
+    HelpText<"Use memory profile for profile-guided memory optimization">,
+    MarshallingInfoString<CodeGenOpts<"MemoryProfileUsePath">>;
 
 // Begin sanitizer flags. These should all be core options exposed in all driver
 // modes.
Index: clang/include/clang/Basic/CodeGenOptions.h
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.h
+++ clang/include/clang/Basic/CodeGenOptions.h
@@ -282,6 +282,9 @@
   /// Name of the profile file to use as output for with -fmemory-profile.
   std::string MemoryProfileOutput;
 
+  /// Name of the profile file to use as input for -fmemory-profile-use.
+  std::string MemoryProfileUsePath;
+
   /// Name of the profile file to use as input for -fprofile-instr-use
   std::string ProfileInstrumentUsePath;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to