https://github.com/SharonXSharon updated 
https://github.com/llvm/llvm-project/pull/182224

>From 9012fe22681b3f1fa1dc9bd866f4ed3d7fc1a30f Mon Sep 17 00:00:00 2001
From: Sharon Xu <[email protected]>
Date: Wed, 18 Feb 2026 13:49:49 -0800
Subject: [PATCH] [memprof] Add Darwin platform support, fine-granularity
 shadow, and V6 raw profile format

Add memprof support for Darwin/Apple platforms, decouple shadow memory
granularity from histogram collection, and extend the raw profile format
with a memory block address section.

Darwin platform support:
- Add `ARM64` to `MEMPROF_SUPPORTED_ARCH` and Darwin to the OS match in
  `config-ix.cmake`.
- Restructure `CMakeLists.txt` following the asan pattern: shared-only
  runtime on Apple, static+shared on other platforms.
- Add `memprof_mac.cpp` with `FindDynamicShadowStart()`,
  `InitializePlatformInterceptors()`, and `MemprofDlSymNext()`.
- Add `memprof_malloc_mac.cpp` using `COMMON_MALLOC_*` macros from
  `sanitizer_malloc_mac.inc` for Darwin malloc zone interception.
- Update `memprof_interceptors.h` to no-op `MEMPROF_INTERCEPT_FUNC` on
  Apple (DYLD interposition handles it).
- Update `memprof_interceptors.cpp` with `SANITIZER_APPLE` paths in
  `COMMON_INTERCEPTOR_ENTER`, `strcpy`, `index`, and `__strdup`.
- Add `aarch64` support to `memprof_allocator.h` and `sched_getcpu()`
  fallback in `memprof_allocator.cpp`.
- Change `memprof_linux.cpp` and `memprof_malloc_linux.cpp` from
  `#error` to `#if SANITIZER_LINUX` guards.
- Update `lit.cfg.py` for Darwin test support (`DYLD_LIBRARY_PATH`,
  dylib naming, `-ldl` conditionalization).
- Add `memprof_malloc_mac.cpp` to `isRuntimePath()` in
  `MemProfReader.cpp`.
- Add Darwin-specific tests and platform guards for Linux-only tests
  (`stress_dtls.c`, `free_sized.cpp`).

Shadow memory granularity refactoring:
- Add `-memprof-fine-granularity` flag that enables 8-byte shadow
  granularity (same as `-memprof-histogram`) without collecting
  per-bucket histogram arrays.
- Add `__memprof_fine_granularity` weak global variable for
  compile-to-runtime communication.
- Update `ShadowMapping`, `instrumentAddress`, and `initializeCallbacks`
  to use fine granularity when either `ClHistogram` or
  `ClFineGranularity` is set.
- Add `UseFineGranularity()` helper in the runtime; update
  `ClearShadow` and `CreateNewMIB` for three-way dispatch (histogram,
  fine-granularity, standard).

Raw profile format V6:
- Bump `MEMPROF_RAW_VERSION` to 6; add 6 to supported versions.
- Add `MemAddressOffset` field to `Header` in `MemProfData.inc` (both
  LLVM and compiler-rt copies).
- Add memory block address section: `NumEntries` (u64) followed by
  allocation addresses (u64 each).
- Update `SerializeToRawProfile` to accept and serialize memory block
  addresses; collect live block addresses in `InsertLiveBlocks`.
- Add `readMemBlockAddresses()` and `readMemInfoBlocksV6()` to
  `MemProfReader.cpp`; store addresses in `RawMemProfReader`.
- Update `rawprofile.cpp` unit test for V6 header layout.
---
 clang/lib/Driver/ToolChains/Darwin.cpp        |   5 +
 .../cmake/Modules/AllSupportedArchDefs.cmake  |   2 +-
 compiler-rt/cmake/config-ix.cmake             |   3 +-
 compiler-rt/include/profile/MemProfData.inc   |   6 +-
 compiler-rt/lib/memprof/CMakeLists.txt        | 178 ++++++++++--------
 compiler-rt/lib/memprof/memprof_allocator.cpp |  97 +++++++++-
 compiler-rt/lib/memprof/memprof_allocator.h   |   7 +-
 .../lib/memprof/memprof_interceptors.cpp      |  29 ++-
 .../lib/memprof/memprof_interceptors.h        |   9 +-
 .../lib/memprof/memprof_interface_internal.h  |   3 +
 compiler-rt/lib/memprof/memprof_internal.h    |   5 +-
 compiler-rt/lib/memprof/memprof_linux.cpp     |   6 +-
 compiler-rt/lib/memprof/memprof_mac.cpp       |  54 ++++++
 .../lib/memprof/memprof_malloc_linux.cpp      |   6 +-
 .../lib/memprof/memprof_malloc_mac.cpp        |  77 ++++++++
 .../lib/memprof/memprof_rawprofile.cpp        |  50 ++++-
 compiler-rt/lib/memprof/memprof_rawprofile.h  |   4 +
 compiler-rt/lib/memprof/memprof_rtl.cpp       |   3 +
 compiler-rt/lib/memprof/tests/CMakeLists.txt  |  31 ++-
 compiler-rt/lib/memprof/tests/rawprofile.cpp  |  91 +++++----
 compiler-rt/lib/memprof/weak_symbols.txt      |   4 +-
 compiler-rt/test/memprof/CMakeLists.txt       |   2 +-
 .../memprof/TestCases/Darwin/lit.local.cfg.py |  10 +
 .../memprof/TestCases/Darwin/malloc_zone.c    |  25 +++
 .../TestCases/Darwin/malloc_zone_allocators.c |  36 ++++
 .../memprof/TestCases/Darwin/new_delete.cpp   |  24 +++
 .../memprof/TestCases/Darwin/raw_profile.cpp  |  19 ++
 .../test/memprof/TestCases/free_sized.cpp     |   1 +
 .../test/memprof/TestCases/stress_dtls.c      |   1 +
 .../test/memprof/Unit/lit.site.cfg.py.in      |   5 +-
 compiler-rt/test/memprof/lit.cfg.py           |  36 ++--
 llvm/include/llvm/ProfileData/MemProfData.inc |   6 +-
 llvm/include/llvm/ProfileData/MemProfReader.h |   9 +
 llvm/lib/ProfileData/MemProfReader.cpp        |  31 +++
 .../MemProfInstrumentation.cpp                |  52 ++++-
 35 files changed, 749 insertions(+), 178 deletions(-)
 create mode 100644 compiler-rt/lib/memprof/memprof_mac.cpp
 create mode 100644 compiler-rt/lib/memprof/memprof_malloc_mac.cpp
 create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py
 create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c
 create mode 100644 
compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c
 create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp
 create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp

diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp 
b/clang/lib/Driver/ToolChains/Darwin.cpp
index 74fcb10c0be22..b172c83038ae4 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1669,6 +1669,11 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList 
&Args,
       AddLinkRuntimeLib(Args, CmdArgs, "stats_client", RLO_AlwaysLink);
       AddLinkSanitizerLibArgs(Args, CmdArgs, "stats");
     }
+    if (Sanitize.needsMemProfRt()) {
+      assert(Sanitize.needsSharedRt() &&
+             "Static sanitizer runtimes not supported");
+      AddLinkSanitizerLibArgs(Args, CmdArgs, "memprof");
+    }
   }
 
   if (Sanitize.needsMemProfRt())
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake 
b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index c2de0d0f652e8..c5642bdf3ca58 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -89,7 +89,7 @@ else()
 endif()
 set(ALL_NSAN_SUPPORTED_ARCH ${X86_64})
 set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
-set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
+set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64} ${ARM64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} 
${PPC64}
     ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
     ${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32})
diff --git a/compiler-rt/cmake/config-ix.cmake 
b/compiler-rt/cmake/config-ix.cmake
index 1f82ff3cf7531..330663935d30c 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -463,6 +463,7 @@ if(APPLE)
 
   set(SANITIZER_COMMON_SUPPORTED_OS osx)
   set(PROFILE_SUPPORTED_OS osx)
+  set(MEMPROF_SUPPORTED_OS osx)
   set(TSAN_SUPPORTED_OS osx)
   set(TYSAN_SUPPORTED_OS osx)
   set(XRAY_SUPPORTED_OS osx)
@@ -835,7 +836,7 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND MEMPROF_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux")
+    OS_NAME MATCHES "Darwin|Linux")
   set(COMPILER_RT_HAS_MEMPROF TRUE)
 else()
   set(COMPILER_RT_HAS_MEMPROF FALSE)
diff --git a/compiler-rt/include/profile/MemProfData.inc 
b/compiler-rt/include/profile/MemProfData.inc
index 26badddae6f3a..5be4c0cc2802a 100644
--- a/compiler-rt/include/profile/MemProfData.inc
+++ b/compiler-rt/include/profile/MemProfData.inc
@@ -33,10 +33,10 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | 
(uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 5ULL
+#define MEMPROF_RAW_VERSION 6ULL
 
 // Currently supported versions.
-#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
+#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL, 6ULL}
 
 #define MEMPROF_V3_MIB_SIZE 132ULL;
 
@@ -52,6 +52,8 @@ PACKED(struct Header {
   uint64_t SegmentOffset;
   uint64_t MIBOffset;
   uint64_t StackOffset;
+  // Added in V6: offset to memory block address section.
+  uint64_t MemAddressOffset;
 });
 
 // A struct describing the information necessary to describe a /proc/maps
diff --git a/compiler-rt/lib/memprof/CMakeLists.txt 
b/compiler-rt/lib/memprof/CMakeLists.txt
index e6d99daca6ee7..30d8fa6885b45 100644
--- a/compiler-rt/lib/memprof/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/CMakeLists.txt
@@ -7,7 +7,9 @@ set(MEMPROF_SOURCES
   memprof_interceptors.cpp
   memprof_interceptors_memintrinsics.cpp
   memprof_linux.cpp
+  memprof_mac.cpp
   memprof_malloc_linux.cpp
+  memprof_malloc_mac.cpp
   memprof_mibmap.cpp
   memprof_posix.cpp
   memprof_rawprofile.cpp
@@ -78,7 +80,7 @@ append_list_if(COMPILER_RT_HAS_LIBLOG log 
MEMPROF_DYNAMIC_LIBS)
 # Compile MemProf sources into an object library.
 
 add_compiler_rt_object_libraries(RTMemprof_dynamic
-  OS ${SANITIZER_COMMON_SUPPORTED_OS}
+  OS ${MEMPROF_SUPPORTED_OS}
   ARCHS ${MEMPROF_SUPPORTED_ARCH}
   SOURCES ${MEMPROF_SOURCES} ${MEMPROF_CXX_SOURCES}
   ADDITIONAL_HEADERS ${MEMPROF_HEADERS}
@@ -86,6 +88,7 @@ add_compiler_rt_object_libraries(RTMemprof_dynamic
   DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
   DEPS ${MEMPROF_DEPS})
 
+if(NOT APPLE)
 add_compiler_rt_object_libraries(RTMemprof
   ARCHS ${MEMPROF_SUPPORTED_ARCH}
   SOURCES ${MEMPROF_SOURCES}
@@ -115,94 +118,115 @@ 
add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy
   CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
   DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
   DEPS ${MEMPROF_DEPS})
+endif()
 
 # Build MemProf runtimes shipped with Clang.
 add_compiler_rt_component(memprof)
 
-# Build separate libraries for each target.
-
-set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS
-  RTInterception
-  RTSanitizerCommon
-  RTSanitizerCommonLibc
-  RTSanitizerCommonCoverage
-  RTSanitizerCommonSymbolizer
-  # FIXME: hangs.
-  # RTSanitizerCommonSymbolizerInternal
-)
-
-add_compiler_rt_runtime(clang_rt.memprof
-  STATIC
-  ARCHS ${MEMPROF_SUPPORTED_ARCH}
-  OBJECT_LIBS RTMemprof_preinit
-              RTMemprof
-              ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
-  CFLAGS ${MEMPROF_CFLAGS}
-  DEFS ${MEMPROF_COMMON_DEFINITIONS}
-  PARENT_TARGET memprof)
-
-add_compiler_rt_runtime(clang_rt.memprof_cxx
-  STATIC
-  ARCHS ${MEMPROF_SUPPORTED_ARCH}
-  OBJECT_LIBS RTMemprof_cxx
-  CFLAGS ${MEMPROF_CFLAGS}
-  DEFS ${MEMPROF_COMMON_DEFINITIONS}
-  PARENT_TARGET memprof)
-
-add_compiler_rt_runtime(clang_rt.memprof-preinit
-  STATIC
-  ARCHS ${MEMPROF_SUPPORTED_ARCH}
-  OBJECT_LIBS RTMemprof_preinit
-  CFLAGS ${MEMPROF_CFLAGS}
-  DEFS ${MEMPROF_COMMON_DEFINITIONS}
-  PARENT_TARGET memprof)
-
-foreach(arch ${MEMPROF_SUPPORTED_ARCH})
-  if (UNIX)
-    add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch}
-                                  LIBS clang_rt.memprof-${arch} 
clang_rt.memprof_cxx-${arch}
-                                  EXTRA memprof.syms.extra)
-    set(VERSION_SCRIPT_FLAG
-         
-Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
-    set_property(SOURCE
-      ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
-      APPEND PROPERTY
-      OBJECT_DEPENDS 
${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
-  else()
-    set(VERSION_SCRIPT_FLAG)
-  endif()
-
-  set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION)
+if(APPLE)
+  add_weak_symbols("memprof" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
 
   add_compiler_rt_runtime(clang_rt.memprof
     SHARED
-    ARCHS ${arch}
-    OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
-            RTMemprof_dynamic
-            # The only purpose of RTMemprof_dynamic_version_script_dummy is to
-            # carry a dependency of the shared runtime on the version script.
-            # Replacing it with a straightforward
-            # add_dependencies(clang_rt.memprof-dynamic-${arch} 
clang_rt.memprof-dynamic-${arch}-version-list)
-            # generates an order-only dependency in ninja.
-            RTMemprof_dynamic_version_script_dummy
-            ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION}
+    OS ${MEMPROF_SUPPORTED_OS}
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_dynamic
+                RTInterception
+                RTSanitizerCommon
+                RTSanitizerCommonLibc
+                RTSanitizerCommonCoverage
+                RTSanitizerCommonSymbolizer
     CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
-    LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS}
-              ${VERSION_SCRIPT_FLAG}
-    LINK_LIBS ${MEMPROF_DYNAMIC_LIBS}
+    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
     DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
     PARENT_TARGET memprof)
+else()
+  # Build separate libraries for each target.
+
+  set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS
+    RTInterception
+    RTSanitizerCommon
+    RTSanitizerCommonLibc
+    RTSanitizerCommonCoverage
+    RTSanitizerCommonSymbolizer
+    # FIXME: hangs.
+    # RTSanitizerCommonSymbolizerInternal
+  )
+
+  add_compiler_rt_runtime(clang_rt.memprof
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_preinit
+                RTMemprof
+                ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
 
-  if (SANITIZER_USE_SYMBOLS)
-    add_sanitizer_rt_symbols(clang_rt.memprof_cxx
-      ARCHS ${arch})
-    add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols)
-    add_sanitizer_rt_symbols(clang_rt.memprof
+  add_compiler_rt_runtime(clang_rt.memprof_cxx
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_cxx
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  add_compiler_rt_runtime(clang_rt.memprof-preinit
+    STATIC
+    ARCHS ${MEMPROF_SUPPORTED_ARCH}
+    OBJECT_LIBS RTMemprof_preinit
+    CFLAGS ${MEMPROF_CFLAGS}
+    DEFS ${MEMPROF_COMMON_DEFINITIONS}
+    PARENT_TARGET memprof)
+
+  foreach(arch ${MEMPROF_SUPPORTED_ARCH})
+    if (UNIX)
+      add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch}
+                                    LIBS clang_rt.memprof-${arch} 
clang_rt.memprof_cxx-${arch}
+                                    EXTRA memprof.syms.extra)
+      set(VERSION_SCRIPT_FLAG
+           
-Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+      set_property(SOURCE
+        ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+        APPEND PROPERTY
+        OBJECT_DEPENDS 
${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers)
+    else()
+      set(VERSION_SCRIPT_FLAG)
+    endif()
+
+    set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION)
+
+    add_compiler_rt_runtime(clang_rt.memprof
+      SHARED
       ARCHS ${arch}
-      EXTRA memprof.syms.extra)
-    add_dependencies(memprof clang_rt.memprof-${arch}-symbols)
-  endif()
-endforeach()
+      OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS}
+              RTMemprof_dynamic
+              # The only purpose of RTMemprof_dynamic_version_script_dummy is 
to
+              # carry a dependency of the shared runtime on the version script.
+              # Replacing it with a straightforward
+              # add_dependencies(clang_rt.memprof-dynamic-${arch} 
clang_rt.memprof-dynamic-${arch}-version-list)
+              # generates an order-only dependency in ninja.
+              RTMemprof_dynamic_version_script_dummy
+              ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION}
+      CFLAGS ${MEMPROF_DYNAMIC_CFLAGS}
+      LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS}
+                ${VERSION_SCRIPT_FLAG}
+      LINK_LIBS ${MEMPROF_DYNAMIC_LIBS}
+      DEFS ${MEMPROF_DYNAMIC_DEFINITIONS}
+      PARENT_TARGET memprof)
+
+    if (SANITIZER_USE_SYMBOLS)
+      add_sanitizer_rt_symbols(clang_rt.memprof_cxx
+        ARCHS ${arch})
+      add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols)
+      add_sanitizer_rt_symbols(clang_rt.memprof
+        ARCHS ${arch}
+        EXTRA memprof.syms.extra)
+      add_dependencies(memprof clang_rt.memprof-${arch}-symbols)
+    endif()
+  endforeach()
+endif()
 
 
 if(COMPILER_RT_INCLUDE_TESTS)
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp 
b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 60f5c853f9d76..9bdf4d4e89383 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -31,17 +31,28 @@
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 
+#if SANITIZER_LINUX
 #include <sched.h>
+#elif SANITIZER_APPLE
+#include <dlfcn.h>
+#endif
 #include <time.h>
 
 #define MAX_HISTOGRAM_PRINT_SIZE 32U
 
 extern bool __memprof_histogram;
+extern bool __memprof_fine_granularity;
 
 namespace __memprof {
 namespace {
 using ::llvm::memprof::MemInfoBlock;
 
+// Returns true if the shadow uses fine (8-byte) granularity, which is the
+// case for both histogram mode and fine-granularity mode.
+static bool UseFineGranularity() {
+  return __memprof_histogram || __memprof_fine_granularity;
+}
+
 void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
   u64 p;
 
@@ -84,13 +95,26 @@ void Print(const MemInfoBlock &M, const u64 id, bool 
print_terse) {
 }
 } // namespace
 
+#if SANITIZER_APPLE
+using OsCpuNumberFn = int (*)();
+static OsCpuNumberFn os_cpu_number_fn = nullptr;
+#endif
+
 static int GetCpuId(void) {
   // _memprof_preinit is called via the preinit_array, which subsequently calls
   // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
   // will seg fault as the address of __vdso_getcpu will be null.
   if (!memprof_inited)
     return -1;
+#if SANITIZER_LINUX
   return sched_getcpu();
+#elif SANITIZER_APPLE
+  if (os_cpu_number_fn)
+    return os_cpu_number_fn();
+  return -1;
+#else
+  return -1;
+#endif
 }
 
 // Compute the timestamp in ms.
@@ -248,7 +272,7 @@ void ClearShadow(uptr addr, uptr size) {
   CHECK(REAL(memset));
   uptr shadow_beg;
   uptr shadow_end;
-  if (__memprof_histogram) {
+  if (UseFineGranularity()) {
     shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
     shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
   } else {
@@ -314,6 +338,13 @@ struct Allocator {
   static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
     if (__memprof_histogram) {
       return CreateNewMIBWithHistogram(p, m, user_size);
+    } else if (__memprof_fine_granularity) {
+      // Fine granularity uses histogram-style shadow layout for counting,
+      // but does not collect per-bucket histograms.
+      u64 c = GetShadowCountHistogram(p, user_size);
+      long curtime = GetTimestamp();
+      return MemInfoBlock(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+                          GetCpuId(), 0, 0);
     } else {
       return CreateNewMIBWithoutHistogram(p, m, user_size);
     }
@@ -354,13 +385,38 @@ struct Allocator {
 
     allocator.ForceLock();
 
-    InsertLiveBlocks();
+    Vector<u64> MemBlockAddresses;
+    InsertLiveBlocks(MemBlockAddresses);
     if (flags()->print_text) {
       if (!flags()->print_terse)
         Printf("Recorded MIBs (incl. live on exit):\n");
       MIBMap.ForEach(PrintCallback,
                      reinterpret_cast<void *>(flags()->print_terse));
+      // On Apple platforms, StackDepotPrintAll() calls
+      // StackTrace::Print() which triggers symbolization via dladdr().
+      // During atexit, this can deadlock because dladdr() acquires dyld
+      // internal locks that may already be held. Instead, print raw
+      // unsymbolized addresses that can be post-processed offline with
+      // atos or llvm-symbolizer.
+#if SANITIZER_APPLE
+      Vector<u64> StackIds;
+      MIBMap.ForEach(
+          [](const uptr Key, LockedMemInfoBlock *const &, void *Arg) {
+            auto *StackIds = reinterpret_cast<Vector<u64> *>(Arg);
+            StackIds->PushBack(Key);
+          },
+          reinterpret_cast<void *>(&StackIds));
+      for (uptr i = 0; i < StackIds.Size(); i++) {
+        u32 Id = static_cast<u32>(StackIds[i]);
+        StackTrace St = StackDepotGet(Id);
+        Printf("Stack for id %u:\n", Id);
+        for (u32 j = 0; j < St.size; j++)
+          Printf("  #%u 0x%zx\n", j, St.trace[j]);
+        Printf("\n");
+      }
+#else
       StackDepotPrintAll();
+#endif
     } else {
       // Serialize the contents to a raw profile. Format documented in
       // memprof_rawprofile.h.
@@ -369,7 +425,8 @@ struct Allocator {
       __sanitizer::ListOfModules List;
       List.init();
       ArrayRef<LoadedModule> Modules(List.begin(), List.end());
-      u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer);
+      u64 BytesSerialized =
+          SerializeToRawProfile(MIBMap, Modules, MemBlockAddresses, Buffer);
       CHECK(Buffer && BytesSerialized && "could not serialize to buffer");
       report_file.Write(Buffer, BytesSerialized);
     }
@@ -378,20 +435,27 @@ struct Allocator {
   }
 
   // Inserts any blocks which have been allocated but not yet deallocated.
-  void InsertLiveBlocks() {
+  // Also records their addresses in MemBlockAddresses.
+  void InsertLiveBlocks(Vector<u64> &MemBlockAddresses) {
+    struct InsertLiveBlocksCtx {
+      Allocator *A;
+      Vector<u64> *Addrs;
+    };
+    InsertLiveBlocksCtx Ctx{this, &MemBlockAddresses};
     allocator.ForEachChunk(
-        [](uptr chunk, void *alloc) {
+        [](uptr chunk, void *arg) {
+          auto *Ctx = (InsertLiveBlocksCtx *)arg;
           u64 user_requested_size;
-          Allocator *A = (Allocator *)alloc;
           MemprofChunk *m =
-              A->GetMemprofChunk((void *)chunk, user_requested_size);
+              Ctx->A->GetMemprofChunk((void *)chunk, user_requested_size);
           if (!m)
             return;
           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
           MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
-          InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
+          InsertOrMerge(m->alloc_context_id, newMIB, Ctx->A->MIBMap);
+          Ctx->Addrs->PushBack(static_cast<u64>(user_beg));
         },
-        this);
+        &Ctx);
   }
 
   void InitLinkerInitialized() {
@@ -641,7 +705,12 @@ static Allocator instance(LINKER_INITIALIZED);
 
 static MemprofAllocator &get_allocator() { return instance.allocator; }
 
-void InitializeAllocator() { instance.InitLinkerInitialized(); }
+void InitializeAllocator() {
+  instance.InitLinkerInitialized();
+#if SANITIZER_APPLE
+  os_cpu_number_fn = (OsCpuNumberFn)dlsym(RTLD_DEFAULT, "_os_cpu_number");
+#endif
+}
 
 void MemprofThreadLocalMallocStorage::CommitBack() {
   instance.CommitBack(this);
@@ -766,6 +835,14 @@ uptr memprof_malloc_usable_size(const void *ptr) {
   return usable_size;
 }
 
+uptr memprof_mz_size(const void *ptr) {
+  return memprof_malloc_usable_size(ptr);
+}
+
+void memprof_mz_force_lock() { instance.ForceLock(); }
+
+void memprof_mz_force_unlock() { instance.ForceUnlock(); }
+
 } // namespace __memprof
 
 // ---------------------- Interface ---------------- {{{1
diff --git a/compiler-rt/lib/memprof/memprof_allocator.h 
b/compiler-rt/lib/memprof/memprof_allocator.h
index 6d898f06f7e42..99fdc4a325fbf 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.h
+++ b/compiler-rt/lib/memprof/memprof_allocator.h
@@ -20,7 +20,7 @@
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_list.h"
 
-#if !defined(__x86_64__)
+#if !defined(__x86_64__) && !defined(__aarch64__)
 #error Unsupported platform
 #endif
 #if !SANITIZER_CAN_USE_ALLOCATOR64
@@ -103,5 +103,10 @@ uptr memprof_malloc_usable_size(const void *ptr);
 
 void PrintInternalAllocatorStats();
 
+// Mac-specific malloc zone functions.
+uptr memprof_mz_size(const void *ptr);
+void memprof_mz_force_lock();
+void memprof_mz_force_unlock();
+
 } // namespace __memprof
 #endif // MEMPROF_ALLOCATOR_H
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp 
b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index f4d7fd46e6198..c142a66d5c7f7 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -63,9 +63,14 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
 #define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                               
\
   MEMPROF_INTERCEPTOR_ENTER(ctx, func);                                        
\
   do {                                                                         
\
-    if (memprof_init_is_running)                                               
\
-      return REAL(func)(__VA_ARGS__);                                          
\
-    ENSURE_MEMPROF_INITED();                                                   
\
+    if constexpr (SANITIZER_APPLE) {                                           
\
+      if (UNLIKELY(!memprof_inited))                                           
\
+        return REAL(func)(__VA_ARGS__);                                        
\
+    } else {                                                                   
\
+      if (memprof_init_is_running)                                             
\
+        return REAL(func)(__VA_ARGS__);                                        
\
+      ENSURE_MEMPROF_INITED();                                                 
\
+    }                                                                          
\
   } while (false)
 #define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path)                              
\
   do {                                                                         
\
@@ -168,8 +173,13 @@ INTERCEPTOR(int, pthread_join, void *t, void **arg) {
 
 DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
+#if SANITIZER_APPLE
+DECLARE_REAL(char *, index, const char *string, int c)
+OVERRIDE_FUNCTION(index, strchr);
+#else
 INTERCEPTOR(char *, index, const char *string, int c)
 ALIAS(WRAP(strchr));
+#endif
 
 // For both strcat() and strncat() we need to check the validity of |to|
 // argument irrespective of the |from| length.
@@ -201,8 +211,13 @@ INTERCEPTOR(char *, strncat, char *to, const char *from, 
usize size) {
 INTERCEPTOR(char *, strcpy, char *to, const char *from) {
   void *ctx;
   MEMPROF_INTERCEPTOR_ENTER(ctx, strcpy);
-  if (memprof_init_is_running) {
-    return REAL(strcpy)(to, from);
+  if constexpr (SANITIZER_APPLE) {
+    if (UNLIKELY(!memprof_inited))
+      return REAL(strcpy)(to, from);
+  } else {
+    if (memprof_init_is_running) {
+      return REAL(strcpy)(to, from);
+    }
   }
   ENSURE_MEMPROF_INITED();
   uptr from_size = internal_strlen(from) + 1;
@@ -225,6 +240,7 @@ INTERCEPTOR(char *, strdup, const char *s) {
   return reinterpret_cast<char *>(new_mem);
 }
 
+#if SANITIZER_LINUX
 INTERCEPTOR(char *, __strdup, const char *s) {
   void *ctx;
   MEMPROF_INTERCEPTOR_ENTER(ctx, strdup);
@@ -238,6 +254,7 @@ INTERCEPTOR(char *, __strdup, const char *s) {
   REAL(memcpy)(new_mem, s, length + 1);
   return reinterpret_cast<char *>(new_mem);
 }
+#endif // SANITIZER_LINUX
 
 INTERCEPTOR(char *, strncpy, char *to, const char *from, usize size) {
   void *ctx;
@@ -320,7 +337,9 @@ void InitializeMemprofInterceptors() {
   MEMPROF_INTERCEPT_FUNC(strncat);
   MEMPROF_INTERCEPT_FUNC(strncpy);
   MEMPROF_INTERCEPT_FUNC(strdup);
+#if SANITIZER_LINUX
   MEMPROF_INTERCEPT_FUNC(__strdup);
+#endif
   MEMPROF_INTERCEPT_FUNC(index);
 
   MEMPROF_INTERCEPT_FUNC(atoi);
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h 
b/compiler-rt/lib/memprof/memprof_interceptors.h
index 53d685706b849..5b21cae811bcf 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.h
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -40,10 +40,11 @@ DECLARE_REAL(char *, strncpy, char *to, const char *from, 
SIZE_T size)
 DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen)
 DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
 
+#if !SANITIZER_APPLE
 #define MEMPROF_INTERCEPT_FUNC(name)                                           
\
   do {                                                                         
\
     if (!INTERCEPT_FUNCTION(name))                                             
\
-      VReport(1, "MemProfiler: failed to intercept '%s'\n'", #name);           
\
+      VReport(1, "MemProfiler: failed to intercept '%s'\n", #name);            
\
   } while (0)
 #define MEMPROF_INTERCEPT_FUNC_VER(name, ver)                                  
\
   do {                                                                         
\
@@ -56,6 +57,12 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
       VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, 
\
               ver, #name);                                                     
\
   } while (0)
+#else
+// OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION.
+#define MEMPROF_INTERCEPT_FUNC(name)
+#define MEMPROF_INTERCEPT_FUNC_VER(name, ver)
+#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver)
+#endif // !SANITIZER_APPLE
 
 #define MEMPROF_INTERCEPTOR_ENTER(ctx, func)                                   
\
   ctx = 0;                                                                     
\
diff --git a/compiler-rt/lib/memprof/memprof_interface_internal.h 
b/compiler-rt/lib/memprof/memprof_interface_internal.h
index 1fd07481a354d..648501c3ad4d1 100644
--- a/compiler-rt/lib/memprof/memprof_interface_internal.h
+++ b/compiler-rt/lib/memprof/memprof_interface_internal.h
@@ -61,6 +61,9 @@ SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern 
char
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern bool
     __memprof_histogram;
 
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern bool
+    __memprof_fine_granularity;
+
 SANITIZER_INTERFACE_ATTRIBUTE int __memprof_profile_dump();
 SANITIZER_INTERFACE_ATTRIBUTE void __memprof_profile_reset();
 
diff --git a/compiler-rt/lib/memprof/memprof_internal.h 
b/compiler-rt/lib/memprof/memprof_internal.h
index ec9fa10badecd..bf896d60baf3b 100644
--- a/compiler-rt/lib/memprof/memprof_internal.h
+++ b/compiler-rt/lib/memprof/memprof_internal.h
@@ -52,11 +52,12 @@ void PrintAddressSpaceLayout();
 // memprof_shadow_setup.cpp
 void InitializeShadowMemory();
 
-// memprof_malloc_linux.cpp
+// memprof_malloc_linux.cpp / memprof_malloc_mac.cpp
 void ReplaceSystemMalloc();
 
-// memprof_linux.cpp
+// memprof_linux.cpp / memprof_mac.cpp
 uptr FindDynamicShadowStart();
+void InitializePlatformExceptionHandlers();
 
 // memprof_thread.cpp
 MemprofThread *CreateMainThread();
diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp 
b/compiler-rt/lib/memprof/memprof_linux.cpp
index fbe5d250f840b..1ac82be202d30 100644
--- a/compiler-rt/lib/memprof/memprof_linux.cpp
+++ b/compiler-rt/lib/memprof/memprof_linux.cpp
@@ -12,9 +12,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if !SANITIZER_LINUX
-#error Unsupported OS
-#endif
+#if SANITIZER_LINUX
 
 #include "memprof_interceptors.h"
 #include "memprof_internal.h"
@@ -65,3 +63,5 @@ uptr FindDynamicShadowStart() {
 void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); }
 
 } // namespace __memprof
+
+#endif // SANITIZER_LINUX
diff --git a/compiler-rt/lib/memprof/memprof_mac.cpp 
b/compiler-rt/lib/memprof/memprof_mac.cpp
new file mode 100644
index 0000000000000..ece8d966db8ac
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mac.cpp
@@ -0,0 +1,54 @@
+//===-- memprof_mac.cpp 
---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Mac-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_APPLE
+
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_mapping.h"
+#include "memprof_stack.h"
+#include "memprof_thread.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mac.h"
+
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <libkern/OSAtomic.h>
+#include <mach-o/dyld.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/sysctl.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+
+namespace __memprof {
+
+void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
+
+uptr FindDynamicShadowStart() {
+  uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
+  return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd,
+                          GetMmapGranularity());
+}
+
+void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); }
+
+} // namespace __memprof
+
+#endif // SANITIZER_APPLE
diff --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp 
b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
index 68fe65475889a..d04d9ab26a158 100644
--- a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
+++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
@@ -14,9 +14,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if !SANITIZER_LINUX
-#error Unsupported OS
-#endif
+#if SANITIZER_LINUX
 
 #include "memprof_allocator.h"
 #include "memprof_interceptors.h"
@@ -163,3 +161,5 @@ INTERCEPTOR(void, malloc_stats, void) { 
__memprof_print_accumulated_stats(); }
 namespace __memprof {
 void ReplaceSystemMalloc() {}
 } // namespace __memprof
+
+#endif // SANITIZER_LINUX
diff --git a/compiler-rt/lib/memprof/memprof_malloc_mac.cpp 
b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp
new file mode 100644
index 0000000000000..ce0fc40c2a156
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp
@@ -0,0 +1,77 @@
+//===-- memprof_malloc_mac.cpp 
--------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+// Mac-specific malloc interception.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_APPLE
+
+#include "memprof_allocator.h"
+#include "memprof_interceptors.h"
+#include "memprof_internal.h"
+#include "memprof_stack.h"
+
+using namespace __memprof;
+#define COMMON_MALLOC_ZONE_NAME "memprof"
+#define COMMON_MALLOC_ENTER()                                                  
\
+  do {                                                                         
\
+    MemprofInitFromRtl();                                                      
\
+  } while (false)
+#define COMMON_MALLOC_SANITIZER_INITIALIZED memprof_inited
+#define COMMON_MALLOC_FORCE_LOCK() memprof_mz_force_lock()
+#define COMMON_MALLOC_FORCE_UNLOCK() memprof_mz_force_unlock()
+#define COMMON_MALLOC_MEMALIGN(alignment, size)                                
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  void *p = memprof_memalign(alignment, size, &stack, FROM_MALLOC)
+#define COMMON_MALLOC_MALLOC(size)                                             
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  void *p = memprof_malloc(size, &stack)
+#define COMMON_MALLOC_REALLOC(ptr, size)                                       
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  void *p = memprof_realloc(ptr, size, &stack);
+#define COMMON_MALLOC_CALLOC(count, size)                                      
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  void *p = memprof_calloc(count, size, &stack);
+#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size)                  
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  int res = memprof_posix_memalign(memptr, alignment, size, &stack);
+#define COMMON_MALLOC_VALLOC(size)                                             
\
+  GET_STACK_TRACE_MALLOC;                                                      
\
+  void *p = memprof_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);
+#define COMMON_MALLOC_FREE(ptr)                                                
\
+  GET_STACK_TRACE_FREE;                                                        
\
+  memprof_free(ptr, &stack, FROM_MALLOC);
+#define COMMON_MALLOC_SIZE(ptr) uptr size = memprof_mz_size(ptr);
+#define COMMON_MALLOC_FILL_STATS(zone, stats)
+#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name)
+#define COMMON_MALLOC_NAMESPACE __memprof
+#define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0
+#define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 0
+
+#include "sanitizer_common/sanitizer_malloc_mac.inc"
+
+namespace COMMON_MALLOC_NAMESPACE {
+
+bool HandleDlopenInit() {
+  static_assert(SANITIZER_SUPPORTS_INIT_FOR_DLOPEN,
+                "Expected SANITIZER_SUPPORTS_INIT_FOR_DLOPEN to be true");
+  auto init_str = GetEnv("APPLE_MEMPROF_INIT_FOR_DLOPEN");
+  if (!init_str)
+    return false;
+  if (internal_strncmp(init_str, "1", 1) != 0)
+    return false;
+  InitMallocZoneFields();
+  return true;
+}
+
+} // namespace COMMON_MALLOC_NAMESPACE
+
+#endif // SANITIZER_APPLE
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f579e12b15d0a..0af57e1c5ea96 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -189,6 +189,30 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector<u64> &StackIds,
   CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
         "Expected num bytes != actual bytes written");
 }
+
+// The memory address section uses the following format:
+// ---------- Mem Address Info
+// Num Entries (u64)
+// ---------- Address Entry
+// Address (u64)
+// ----------
+// ...
+u64 MemAddressSizeBytes(const Vector<u64> &Addresses) {
+  return sizeof(u64) + Addresses.Size() * sizeof(u64);
+}
+
+void SerializeMemAddressesToBuffer(const Vector<u64> &Addresses,
+                                   const u64 ExpectedNumBytes, char *&Buffer) {
+  char *Ptr = Buffer;
+  const u64 NumEntries = Addresses.Size();
+  Ptr = WriteBytes(NumEntries, Ptr);
+  for (u64 i = 0; i < NumEntries; i++) {
+    Ptr = WriteBytes(Addresses[i], Ptr);
+  }
+  CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
+        "Expected num bytes != actual bytes written");
+}
+
 } // namespace
 
 // Format
@@ -199,6 +223,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector<u64> &StackIds,
 // Segment Offset
 // MIB Info Offset
 // Stack Offset
+// Mem Address Offset
 // ---------- Segment Info
 // Num Entries
 // ---------- Segment Entry
@@ -234,9 +259,16 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector<u64> &StackIds,
 // ...
 // ----------
 // Optional Padding Bytes
+// ---------- Mem Address Info (V6+)
+// Num Entries
+// ---------- Address Entry
+// Address (u64)
+// ----------
+// ...
+// Optional Padding Bytes
 // ...
 u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
-                          char *&Buffer) {
+                          Vector<u64> &MemBlockAddresses, char *&Buffer) {
   // Each section size is rounded up to 8b since the first entry in each 
section
   // is a u64 which holds the number of entries in the section by convention.
   const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Modules), 8);
@@ -262,25 +294,32 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef<LoadedModule> Modules,
 
   const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
 
+  const u64 NumMemAddressBytes =
+      RoundUpTo(MemAddressSizeBytes(MemBlockAddresses), 8);
+
   // Ensure that the profile is 8b aligned. We allow for some optional padding
   // at the end so that any subsequent profile serialized to the same file does
   // not incur unaligned accesses.
   const u64 TotalSizeBytes =
       RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
-                    NumMIBInfoBytes + NumHistogramBytes,
+                    NumMIBInfoBytes + NumHistogramBytes + NumMemAddressBytes,
                 8);
 
   // Allocate the memory for the entire buffer incl. info blocks.
   Buffer = (char *)InternalAlloc(TotalSizeBytes);
   char *Ptr = Buffer;
 
+  const u64 StackOffset =
+      sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes + NumHistogramBytes;
+  const u64 MemAddressOffset = StackOffset + NumStackBytes;
+
   Header header{MEMPROF_RAW_MAGIC_64,
                 MEMPROF_RAW_VERSION,
                 static_cast<u64>(TotalSizeBytes),
                 sizeof(Header),
                 sizeof(Header) + NumSegmentBytes,
-                sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
-                    NumHistogramBytes};
+                StackOffset,
+                MemAddressOffset};
   Ptr = WriteBytes(header, Ptr);
 
   SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
@@ -291,6 +330,9 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef<LoadedModule> Modules,
   Ptr += NumMIBInfoBytes + NumHistogramBytes;
 
   SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
+  Ptr += NumStackBytes;
+
+  SerializeMemAddressesToBuffer(MemBlockAddresses, NumMemAddressBytes, Ptr);
 
   return TotalSizeBytes;
 }
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h 
b/compiler-rt/lib/memprof/memprof_rawprofile.h
index e2494175f165e..7f66498a2debb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.h
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.h
@@ -4,11 +4,15 @@
 #include "memprof_mibmap.h"
 #include "sanitizer_common/sanitizer_array_ref.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_vector.h"
 
 namespace __memprof {
 // Serialize the in-memory representation of the memprof profile to the raw
 // binary format. The format itself is documented memprof_rawprofile.cpp.
+// MemBlockAddresses contains the addresses of memory blocks observed during
+// profiling.
 u64 SerializeToRawProfile(MIBMapTy &BlockCache, ArrayRef<LoadedModule> Modules,
+                          __sanitizer::Vector<u64> &MemBlockAddresses,
                           char *&Buffer);
 } // namespace __memprof
 
diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp 
b/compiler-rt/lib/memprof/memprof_rtl.cpp
index 4fd4b5210a7ec..ed1d497661be9 100644
--- a/compiler-rt/lib/memprof/memprof_rtl.cpp
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -37,6 +37,9 @@ SANITIZER_WEAK_ATTRIBUTE char __memprof_profile_filename[1];
 // Share ClHistogram compiler flag with runtime.
 SANITIZER_WEAK_ATTRIBUTE bool __memprof_histogram;
 
+// Share ClFineGranularity compiler flag with runtime.
+SANITIZER_WEAK_ATTRIBUTE bool __memprof_fine_granularity;
+
 namespace __memprof {
 
 static void MemprofDie() {
diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt 
b/compiler-rt/lib/memprof/tests/CMakeLists.txt
index 1603d47d019ed..080dfc0c3499e 100644
--- a/compiler-rt/lib/memprof/tests/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt
@@ -42,6 +42,14 @@ if(NOT WIN32)
   list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -pthread)
 endif()
 
+if(APPLE)
+  list(APPEND MEMPROF_UNITTEST_CFLAGS ${DARWIN_osx_CFLAGS})
+  list(APPEND MEMPROF_UNITTEST_LINK_FLAGS ${DARWIN_osx_LINK_FLAGS})
+
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+  list(APPEND MEMPROF_UNITTEST_LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS})
+endif()
+
 set(MEMPROF_UNITTEST_DEPS)
 if (TARGET cxx-headers OR HAVE_LIBCXX)
   list(APPEND MEMPROF_UNITTEST_DEPS cxx-headers)
@@ -54,13 +62,22 @@ append_list_if(COMPILER_RT_HAS_LIBDL -ldl 
MEMPROF_UNITTEST_LINK_LIBRARIES)
 
 # Adds memprof tests for each architecture.
 macro(add_memprof_tests_for_arch arch)
-  set(MEMPROF_TEST_RUNTIME_OBJECTS
-    $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
-  )
+  if(APPLE)
+    set(MEMPROF_TEST_RUNTIME_OBJECTS
+      $<TARGET_OBJECTS:RTSanitizerCommon.osx>
+      $<TARGET_OBJECTS:RTSanitizerCommonCoverage.osx>
+      $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
+      $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.osx>
+    )
+  else()
+    set(MEMPROF_TEST_RUNTIME_OBJECTS
+      $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+      $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
+      $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+      $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
+      $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
+    )
+  endif()
   set(MEMPROF_TEST_RUNTIME RTMemProfTest.${arch})
   add_library(${MEMPROF_TEST_RUNTIME} STATIC ${MEMPROF_TEST_RUNTIME_OBJECTS})
   set_target_properties(${MEMPROF_TEST_RUNTIME} PROPERTIES
diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp 
b/compiler-rt/lib/memprof/tests/rawprofile.cpp
index 5764af9ce8afb..01fdcd9fb7230 100644
--- a/compiler-rt/lib/memprof/tests/rawprofile.cpp
+++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp
@@ -9,6 +9,7 @@
 #include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_vector.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -47,8 +48,9 @@ TEST(MemProf, Basic) {
   __sanitizer::LoadedModule FakeModule;
   FakeModule.addAddressRange(/*begin=*/0x10, /*end=*/0x20, /*executable=*/true,
                              /*writable=*/false, /*name=*/"");
-  const char uuid[MEMPROF_BUILDID_MAX_SIZE] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE};
-  FakeModule.setUuid(uuid, MEMPROF_BUILDID_MAX_SIZE);
+  const char uuid[__sanitizer::kModuleUUIDSize] = {0xC, 0x0, 0xF,
+                                                   0xF, 0xE, 0xE};
+  FakeModule.setUuid(uuid, __sanitizer::kModuleUUIDSize);
   __sanitizer::ArrayRef<__sanitizer::LoadedModule> Modules(&FakeModule,
                                                            (&FakeModule) + 1);
 
@@ -60,12 +62,18 @@ TEST(MemProf, Basic) {
   FakeMIB.AllocCount = 0x1;
   FakeMIB.TotalAccessCount = 0x2;
 
+  // Use large enough PC values to avoid underflow when
+  // GetPreviousInstructionPc subtracts an architecture-dependent value.
   uint64_t FakeIds[2];
-  FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/2, FakeMap);
-  FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/3, FakeMap);
+  FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/100, FakeMap);
+  FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/200, FakeMap);
 
   char *Ptr = nullptr;
-  uint64_t NumBytes = SerializeToRawProfile(FakeMap, Modules, Ptr);
+  __sanitizer::Vector<uint64_t> FakeAddresses;
+  FakeAddresses.PushBack(0x100);
+  FakeAddresses.PushBack(0x200);
+  uint64_t NumBytes =
+      SerializeToRawProfile(FakeMap, Modules, FakeAddresses, Ptr);
   const char *Buffer = Ptr;
 
   ASSERT_GT(NumBytes, 0ULL);
@@ -78,39 +86,54 @@ TEST(MemProf, Basic) {
   const uint64_t SegmentOffset = Read(Ptr);
   const uint64_t MIBOffset = Read(Ptr);
   const uint64_t StackOffset = Read(Ptr);
+  const uint64_t MemAddressOffset = Read(Ptr);
 
   // ============= Check sizes and padding.
   EXPECT_EQ(TotalSize, NumBytes);
   EXPECT_EQ(TotalSize % 8, 0ULL);
 
-  // Should be equal to the size of the raw profile header.
-  EXPECT_EQ(SegmentOffset, 48ULL);
+  // Should be equal to the size of the raw profile header (7 fields * 8 
bytes).
+  EXPECT_EQ(SegmentOffset, 56ULL);
 
   // We expect only 1 segment entry, 8b for the count and 64b for SegmentEntry
   // in memprof_rawprofile.cpp.
   EXPECT_EQ(MIBOffset - SegmentOffset, 72ULL);
 
-  EXPECT_EQ(MIBOffset, 120ULL);
+  EXPECT_EQ(MIBOffset, 128ULL);
   // We expect 2 mib entry, 8b for the count and sizeof(uint64_t) +
   // sizeof(MemInfoBlock) contains stack id + MeminfoBlock.
   EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock)));
 
-  EXPECT_EQ(StackOffset, 432ULL);
+  EXPECT_EQ(StackOffset, 440ULL);
   // We expect 2 stack entries, with 5 frames - 8b for total count,
   // 2 * (8b for id, 8b for frame count and 5*8b for fake frames).
-  // Since this is the last section, there may be additional padding at the end
-  // to make the total profile size 8b aligned.
-  EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
+  // Since this is no longer the last section, check the exact size.
+  EXPECT_GE(MemAddressOffset - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
+
+  // We expect 2 address entries: 8b for count + 2 * 8b for addresses.
+  EXPECT_GE(TotalSize - MemAddressOffset, 8ULL + 2 * 8);
 
   // ============= Check contents.
-  unsigned char ExpectedSegmentBytes[72] = {
-      0x01, 0,   0,   0,   0,   0,  0, 0, // Number of entries
-      0x10, 0,   0,   0,   0,   0,  0, 0, // Start
-      0x20, 0,   0,   0,   0,   0,  0, 0, // End
-      0x0,  0,   0,   0,   0,   0,  0, 0, // Offset
-      0x20, 0,   0,   0,   0,   0,  0, 0, // UuidSize
-      0xC,  0x0, 0xF, 0xF, 0xE, 0xE       // Uuid
-  };
+  // Build expected segment bytes dynamically since uuid size varies by
+  // platform (kModuleUUIDSize is 16 on Apple, 32 on Linux).
+  unsigned char ExpectedSegmentBytes[72] = {};
+  // Number of entries = 1
+  ExpectedSegmentBytes[0] = 0x01;
+  // Start = 0x10
+  ExpectedSegmentBytes[8] = 0x10;
+  // End = 0x20
+  ExpectedSegmentBytes[16] = 0x20;
+  // Offset = 0x0 (base_address_ is 0 for default-constructed LoadedModule)
+  // BuildIdSize = kModuleUUIDSize
+  ExpectedSegmentBytes[32] =
+      static_cast<unsigned char>(__sanitizer::kModuleUUIDSize);
+  // Uuid
+  ExpectedSegmentBytes[40] = 0xC;
+  ExpectedSegmentBytes[41] = 0x0;
+  ExpectedSegmentBytes[42] = 0xF;
+  ExpectedSegmentBytes[43] = 0xF;
+  ExpectedSegmentBytes[44] = 0xE;
+  ExpectedSegmentBytes[45] = 0xE;
   EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 72), 0);
 
   // Check that the number of entries is 2.
@@ -139,21 +162,19 @@ TEST(MemProf, Basic) {
   // Check that the 1st stack id is set.
   EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + StackOffset + 8),
             FakeIds[0]);
-  // Contents are num pcs, value of each pc - 1.
-  unsigned char ExpectedStackBytes[2][6 * 8] = {
-      {
-          0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs
-          0x1, 0, 0, 0, 0, 0, 0, 0, // PC ...
-          0x2, 0, 0, 0, 0, 0, 0, 0, 0x3, 0, 0, 0, 0, 0, 0, 0,
-          0x4, 0, 0, 0, 0, 0, 0, 0, 0x5, 0, 0, 0, 0, 0, 0, 0,
-      },
-      {
-          0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs
-          0x2, 0, 0, 0, 0, 0, 0, 0, // PC ...
-          0x3, 0, 0, 0, 0, 0, 0, 0, 0x4, 0, 0, 0, 0, 0, 0, 0,
-          0x5, 0, 0, 0, 0, 0, 0, 0, 0x6, 0, 0, 0, 0, 0, 0, 0,
-      },
-  };
+  // Build expected stack bytes dynamically since GetPreviousInstructionPc
+  // applies an architecture-dependent adjustment (e.g., -1 on x86_64, -4 on
+  // arm64).
+  unsigned char ExpectedStackBytes[2][6 * 8] = {};
+  for (int s = 0; s < 2; s++) {
+    uintptr_t StackPCBegin = (s == 0) ? 100 : 200;
+    // Number of PCs = 5
+    ExpectedStackBytes[s][0] = 0x5;
+    for (int i = 0; i < 5; i++) {
+      uint64_t pc = StackTrace::GetPreviousInstructionPc(StackPCBegin + i);
+      memcpy(&ExpectedStackBytes[s][(i + 1) * 8], &pc, sizeof(pc));
+    }
+  }
   EXPECT_EQ(memcmp(Buffer + StackOffset + 16, ExpectedStackBytes[0],
                    sizeof(ExpectedStackBytes[0])),
             0);
diff --git a/compiler-rt/lib/memprof/weak_symbols.txt 
b/compiler-rt/lib/memprof/weak_symbols.txt
index bfece89e2e157..d43870f8965f2 100644
--- a/compiler-rt/lib/memprof/weak_symbols.txt
+++ b/compiler-rt/lib/memprof/weak_symbols.txt
@@ -1 +1,3 @@
-___memprof_default_options_str ___memprof_default_options 
__memprof_profile_filename
+___memprof_default_options_str
+___memprof_default_options
+__memprof_profile_filename
diff --git a/compiler-rt/test/memprof/CMakeLists.txt 
b/compiler-rt/test/memprof/CMakeLists.txt
index 4c50ae6b83719..862565de24641 100644
--- a/compiler-rt/test/memprof/CMakeLists.txt
+++ b/compiler-rt/test/memprof/CMakeLists.txt
@@ -4,7 +4,7 @@ set(MEMPROF_TESTSUITES)
 set(MEMPROF_DYNAMIC_TESTSUITES)
 
 macro(get_bits_for_arch arch bits)
-  if (${arch} MATCHES "x86_64")
+  if (${arch} MATCHES "x86_64" OR ${arch} MATCHES "arm64" OR ${arch} MATCHES 
"aarch64")
     set(${bits} 64)
   else()
     message(FATAL_ERROR "Unexpected target architecture: ${arch}")
diff --git a/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py 
b/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py
new file mode 100644
index 0000000000000..af82d30cf4de9
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py
@@ -0,0 +1,10 @@
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+
+root = getRoot(config)
+
+if root.target_os not in ["Darwin"]:
+    config.unsupported = True
diff --git a/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c 
b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c
new file mode 100644
index 0000000000000..d057ab99ade6c
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c
@@ -0,0 +1,25 @@
+// Test basic memory profiling on Darwin via DYLD interposition of malloc/free.
+// Verifies that memprof correctly intercepts allocations through the malloc
+// zone mechanism.
+
+// RUN: %clang_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_text=true:log_path=stderr %run %t 2>&1 | 
FileCheck %s
+
+// CHECK:  Memory allocation stack id = 
[[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 
/ 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 0x{{[0-9a-f]+}}
+// CHECK-NEXT: #1 0x{{[0-9a-f]+}}
+
+#include <stdlib.h>
+
+int main() {
+  int *p = (int *)malloc(10 * sizeof(int));
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+  free(p);
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c 
b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c
new file mode 100644
index 0000000000000..57013b714288b
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c
@@ -0,0 +1,36 @@
+// Test that calloc, realloc, and posix_memalign are properly intercepted
+// through the Darwin malloc zone mechanism.
+
+// RUN: %clang_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_text=true:log_path=stderr %run %t 2>&1 | 
FileCheck %s
+
+// CHECK: Memory allocation stack id
+// CHECK: alloc_count
+
+#include <stdlib.h>
+#include <string.h>
+
+int main() {
+  // Test calloc interception.
+  int *p = (int *)calloc(10, sizeof(int));
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  free(p);
+
+  // Test realloc interception.
+  char *q = (char *)malloc(10);
+  memset(q, 'a', 10);
+  q = (char *)realloc(q, 20);
+  memset(q, 'b', 20);
+  free(q);
+
+  // Test posix_memalign interception.
+  void *r;
+  int ret = posix_memalign(&r, 64, 128);
+  if (ret == 0) {
+    memset(r, 0, 128);
+    free(r);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp 
b/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp
new file mode 100644
index 0000000000000..a2d5001386432
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp
@@ -0,0 +1,24 @@
+// Test that memprof works with operator new/delete on Darwin.
+// This verifies C++ allocation interception through DYLD interposition.
+
+// RUN: %clangxx_memprof -O0 %s -o %t
+// RUN: %env_memprof_opts=print_text=true:log_path=stderr %run %t 2>&1 | 
FileCheck %s
+
+// CHECK:  Memory allocation stack id = 
[[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 
/ 40
+// CHECK-NEXT:  access_count (ave/min/max): 20.00 / 20 / 20
+// CHECK: Stack for id [[STACKID]]:
+// CHECK-NEXT: #0 0x{{[0-9a-f]+}}
+// CHECK-NEXT: #1 0x{{[0-9a-f]+}}
+
+#include <cstdlib>
+
+int main() {
+  int *p = new int[10];
+  for (int i = 0; i < 10; i++)
+    p[i] = i;
+  int j = 0;
+  for (int i = 0; i < 10; i++)
+    j += p[i];
+  delete[] p;
+  return 0;
+}
diff --git a/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp 
b/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp
new file mode 100644
index 0000000000000..4ce8e3b8787de
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp
@@ -0,0 +1,19 @@
+// Test that the raw binary profile is correctly generated on Darwin.
+// Verify the magic number header to ensure profile serialization works.
+
+// RUN: %clangxx_memprof %s -o %t
+// RUN: %env_memprof_opts=log_path=stdout %run %t > %t.memprofraw
+// RUN: od -c -N 8 %t.memprofraw | FileCheck %s
+
+#include <cstdlib>
+#include <cstring>
+
+int main() {
+  char *x = (char *)malloc(10);
+  memset(x, 0, 10);
+  free(x);
+  return 0;
+}
+
+// Check the raw profile magic number (little-endian).
+// CHECK: 0000000 201   r   f   o   r   p   m 377
diff --git a/compiler-rt/test/memprof/TestCases/free_sized.cpp 
b/compiler-rt/test/memprof/TestCases/free_sized.cpp
index c8ce8464ef895..66c14735b051e 100644
--- a/compiler-rt/test/memprof/TestCases/free_sized.cpp
+++ b/compiler-rt/test/memprof/TestCases/free_sized.cpp
@@ -1,3 +1,4 @@
+// UNSUPPORTED: darwin
 // RUN: %clangxx_memprof %s -o %t
 
 // RUN: %env_memprof_opts=print_text=true:log_path=stdout %run %t | FileCheck 
%s
diff --git a/compiler-rt/test/memprof/TestCases/stress_dtls.c 
b/compiler-rt/test/memprof/TestCases/stress_dtls.c
index 8a0d671fb241a..f1a59eb868d12 100644
--- a/compiler-rt/test/memprof/TestCases/stress_dtls.c
+++ b/compiler-rt/test/memprof/TestCases/stress_dtls.c
@@ -1,4 +1,5 @@
 // REQUIRES: memprof-64-bits
+// UNSUPPORTED: darwin
 // Stress test dynamic TLS + dlopen + threads.
 //
 // RUN: %clang_memprof -x c -DSO_NAME=f0 %s -shared -o %t-f0.so -fPIC
diff --git a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in 
b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
index 1e2442a1487a4..66a7d71ba1684 100644
--- a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
+++ b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
@@ -11,7 +11,6 @@ lit_config.load_config(config, 
"@COMPILER_RT_BINARY_DIR@/unittests/lit.common.un
 # Setup config name.
 config.name = 'MemProfiler-Unit'
 config.target_arch = "@arch@"
-assert config.target_arch == 'x86_64'
 
 config.test_exec_root = os.path.join("@COMPILER_RT_BINARY_DIR@",
                                      "lib", "memprof", "tests")
@@ -21,9 +20,7 @@ config.test_source_root = config.test_exec_root
 # When LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=on, the initial value of
 # config.compiler_rt_libdir (COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR) has the
 # host triple as the trailing path component. The value is incorrect for i386
-# tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as
-# target, and we don't support different environments for building and,
-# respectively, running tests, we we only need to fix up the x86_64 case.
+# tests on x86_64 hosts and vice versa.
 if config.enable_per_target_runtime_dir and config.target_arch != 
config.host_arch:
     config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', 
config.compiler_rt_libdir)
 
diff --git a/compiler-rt/test/memprof/lit.cfg.py 
b/compiler-rt/test/memprof/lit.cfg.py
index e28507be4dc9e..4f4f0a39f279a 100644
--- a/compiler-rt/test/memprof/lit.cfg.py
+++ b/compiler-rt/test/memprof/lit.cfg.py
@@ -35,7 +35,9 @@ def get_required_attr(config, attr_name):
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
 
-libdl_flag = "-ldl"
+libdl_flag = ""
+if config.target_os == "Linux":
+    libdl_flag = "-ldl"
 
 # Setup default compiler flags used with -fmemory-profile option.
 # FIXME: Review the set of required flags and check if it can be reduced.
@@ -74,10 +76,16 @@ def build_invocation(compile_flags):
     ("%clangxx_memprof ", build_invocation(clang_memprof_cxxflags))
 )
 if config.memprof_dynamic:
-    shared_libmemprof_path = os.path.join(
-        config.compiler_rt_libdir,
-        "libclang_rt.memprof{}.so".format(config.target_suffix),
-    )
+    if config.target_os == "Darwin":
+        shared_libmemprof_path = os.path.join(
+            config.compiler_rt_libdir,
+            "libclang_rt.memprof_osx_dynamic.dylib",
+        )
+    else:
+        shared_libmemprof_path = os.path.join(
+            config.compiler_rt_libdir,
+            "libclang_rt.memprof{}.so".format(config.target_suffix),
+        )
     config.substitutions.append(("%shared_libmemprof", shared_libmemprof_path))
     config.substitutions.append(
         ("%clang_memprof_static ", 
build_invocation(clang_memprof_static_cflags))
@@ -92,11 +100,17 @@ def build_invocation(compile_flags):
 
 config.available_features.add("fast-unwinder-works")
 
-# Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
-new_ld_library_path = os.path.pathsep.join(
-    (config.compiler_rt_libdir, config.environment.get("LD_LIBRARY_PATH", ""))
-)
-config.environment["LD_LIBRARY_PATH"] = new_ld_library_path
+# Set library path to pick dynamic runtime up properly.
+if config.target_os == "Darwin":
+    new_dyld_library_path = os.path.pathsep.join(
+        (config.compiler_rt_libdir, 
config.environment.get("DYLD_LIBRARY_PATH", ""))
+    )
+    config.environment["DYLD_LIBRARY_PATH"] = new_dyld_library_path
+else:
+    new_ld_library_path = os.path.pathsep.join(
+        (config.compiler_rt_libdir, config.environment.get("LD_LIBRARY_PATH", 
""))
+    )
+    config.environment["LD_LIBRARY_PATH"] = new_ld_library_path
 
 # Default test suffixes.
 config.suffixes = [".c", ".cpp"]
@@ -106,7 +120,7 @@ def build_invocation(compile_flags):
 config.substitutions.append(("%pie", "-pie"))
 
 # Only run the tests on supported OSs.
-if config.target_os not in ["Linux"]:
+if config.target_os not in ["Linux", "Darwin"]:
     config.unsupported = True
 
 if not config.parallelism_group:
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc 
b/llvm/include/llvm/ProfileData/MemProfData.inc
index 26badddae6f3a..5be4c0cc2802a 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -33,10 +33,10 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | 
(uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 5ULL
+#define MEMPROF_RAW_VERSION 6ULL
 
 // Currently supported versions.
-#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL}
+#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL, 6ULL}
 
 #define MEMPROF_V3_MIB_SIZE 132ULL;
 
@@ -52,6 +52,8 @@ PACKED(struct Header {
   uint64_t SegmentOffset;
   uint64_t MIBOffset;
   uint64_t StackOffset;
+  // Added in V6: offset to memory block address section.
+  uint64_t MemAddressOffset;
 });
 
 // A struct describing the information necessary to describe a /proc/maps
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h 
b/llvm/include/llvm/ProfileData/MemProfReader.h
index 8fdae7a472d5f..de86af824c306 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -156,6 +156,12 @@ class LLVM_ABI RawMemProfReader final : public 
MemProfReader {
       report_fatal_error(std::move(E));
   }
 
+  // Returns memory block addresses recorded during profiling (V6+).
+  // TODO: Used by llvm-profdata to map addresses back to symbols.
+  const llvm::SmallVector<uint64_t> &getMemBlockAddresses() const {
+    return MemBlockAddresses;
+  }
+
 private:
   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
@@ -202,6 +208,9 @@ class LLVM_ABI RawMemProfReader final : public 
MemProfReader {
   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
   CallStackMap StackMap;
 
+  // Memory block addresses recorded during profiling (V6+).
+  llvm::SmallVector<uint64_t> MemBlockAddresses;
+
   // Cached symbolization from PC to Frame.
   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
 
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index 3fc0dbfd8e69d..9188b5698aea8 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -213,6 +213,26 @@ readMemInfoBlocksV5(const char *Ptr) {
   return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true);
 }
 
+// V6 uses the same MIB format as V5.
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+readMemInfoBlocksV6(const char *Ptr) {
+  return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true);
+}
+
+llvm::SmallVector<uint64_t> readMemBlockAddresses(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumEntries =
+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  llvm::SmallVector<uint64_t> Addresses;
+  Addresses.reserve(NumEntries);
+  for (uint64_t I = 0; I < NumEntries; I++) {
+    Addresses.push_back(
+        endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
+  }
+  return Addresses;
+}
+
 CallStackMap readStackInfo(const char *Ptr) {
   using namespace support;
 
@@ -261,6 +281,7 @@ bool isRuntimePath(const StringRef Path) {
   // This list should be updated in case new files with additional interceptors
   // are added to the memprof runtime.
   return Filename == "memprof_malloc_linux.cpp" ||
+         Filename == "memprof_malloc_mac.cpp" ||
          Filename == "memprof_interceptors.cpp" ||
          Filename == "memprof_new_delete.cpp";
 }
@@ -707,6 +728,8 @@ RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
     return readMemInfoBlocksV4(Ptr);
   if (MemprofRawVersion == 5ULL)
     return readMemInfoBlocksV5(Ptr);
+  if (MemprofRawVersion == 6ULL)
+    return readMemInfoBlocksV6(Ptr);
   llvm_unreachable(
       "Panic: Unsupported version number when reading MemInfoBlocks");
 }
@@ -773,6 +796,14 @@ Error RawMemProfReader::readRawProfile(
             "memprof raw profile got different call stack for same id");
     }
 
+    // Read in memory block addresses for V6+.
+    if (MemprofRawVersion >= 6ULL) {
+      for (const auto &Addr :
+           readMemBlockAddresses(Next + Header->MemAddressOffset)) {
+        MemBlockAddresses.push_back(Addr);
+      }
+    }
+
     Next += Header->TotalSize;
   }
 
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp 
b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 05616d81dbe5f..1da8a4b73f26a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -71,6 +71,8 @@ constexpr char MemProfFilenameVar[] = 
"__memprof_profile_filename";
 
 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
 
+constexpr char MemProfFineGranularityFlagVar[] = "__memprof_fine_granularity";
+
 // Command-line flags.
 
 static cl::opt<bool> ClInsertVersionCheck(
@@ -138,6 +140,12 @@ static cl::opt<bool> ClHistogram("memprof-histogram",
                                  cl::desc("Collect access count histograms"),
                                  cl::Hidden, cl::init(false));
 
+static cl::opt<bool> ClFineGranularity(
+    "memprof-fine-granularity",
+    cl::desc("Use fine shadow granularity (8 bytes) without collecting "
+             "histograms"),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<std::string>
     MemprofRuntimeDefaultOptions("memprof-runtime-default-options",
                                  cl::desc("The default memprof options"),
@@ -156,7 +164,8 @@ namespace {
 struct ShadowMapping {
   ShadowMapping() {
     Scale = ClMappingScale;
-    Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
+    Granularity = (ClHistogram || ClFineGranularity) ? HistogramGranularity
+                                                     : ClMappingGranularity;
     Mask = ~(Granularity - 1);
   }
 
@@ -240,8 +249,15 @@ MemProfilerPass::MemProfilerPass() = default;
 
 PreservedAnalyses MemProfilerPass::run(Function &F,
                                        AnalysisManager<Function> &AM) {
-  assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
-         "Memprof with histogram only supports default mapping granularity");
+  if (ClHistogram && ClMappingGranularity != DefaultMemGranularity)
+    report_fatal_error(
+        "Memprof with histogram only supports default mapping granularity");
+  if (ClFineGranularity && ClMappingGranularity != DefaultMemGranularity)
+    report_fatal_error("Memprof with fine granularity only supports default "
+                       "mapping granularity");
+  if (ClHistogram && ClFineGranularity)
+    report_fatal_error(
+        "Cannot use both -memprof-histogram and -memprof-fine-granularity");
   Module &M = *F.getParent();
   MemProfiler Profiler(M);
   if (Profiler.instrumentFunction(F))
@@ -451,14 +467,16 @@ void MemProfiler::instrumentAddress(Instruction *OrigIns,
     return;
   }
 
-  Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
+  Type *ShadowTy = (ClHistogram || ClFineGranularity) ? Type::getInt8Ty(*C)
+                                                      : Type::getInt64Ty(*C);
   Type *ShadowPtrTy = PointerType::get(*C, 0);
 
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
   Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
-  // If we are profiling with histograms, add overflow protection at 255.
-  if (ClHistogram) {
+  // If we are using fine granularity shadow (histogram or fine-granularity
+  // mode), add overflow protection at 255.
+  if (ClHistogram || ClFineGranularity) {
     Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
     Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
     Instruction *IncBlock =
@@ -506,6 +524,23 @@ void createMemprofHistogramFlagVar(Module &M) {
   appendToCompilerUsed(M, MemprofHistogramFlag);
 }
 
+// Set MemprofFineGranularityFlag as a Global variable in IR. This tells the
+// runtime to use fine (8-byte) shadow granularity without collecting
+// histograms.
+void createMemprofFineGranularityFlagVar(Module &M) {
+  const StringRef VarName(MemProfFineGranularityFlagVar);
+  Type *IntTy1 = Type::getInt1Ty(M.getContext());
+  auto MemprofFineGranularityFlag = new GlobalVariable(
+      M, IntTy1, true, GlobalValue::WeakAnyLinkage,
+      Constant::getIntegerValue(IntTy1, APInt(1, ClFineGranularity)), VarName);
+  const Triple &TT = M.getTargetTriple();
+  if (TT.supportsCOMDAT()) {
+    MemprofFineGranularityFlag->setLinkage(GlobalValue::ExternalLinkage);
+    MemprofFineGranularityFlag->setComdat(M.getOrInsertComdat(VarName));
+  }
+  appendToCompilerUsed(M, MemprofFineGranularityFlag);
+}
+
 void createMemprofDefaultOptionsVar(Module &M) {
   Constant *OptionsConst = ConstantDataArray::getString(
       M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true);
@@ -539,6 +574,8 @@ bool ModuleMemProfiler::instrumentModule(Module &M) {
 
   createMemprofHistogramFlagVar(M);
 
+  createMemprofFineGranularityFlagVar(M);
+
   createMemprofDefaultOptionsVar(M);
 
   return true;
@@ -549,7 +586,8 @@ void MemProfiler::initializeCallbacks(Module &M) {
 
   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
     const std::string TypeStr = AccessIsWrite ? "store" : "load";
-    const std::string HistPrefix = ClHistogram ? "hist_" : "";
+    const std::string HistPrefix =
+        (ClHistogram || ClFineGranularity) ? "hist_" : "";
 
     SmallVector<Type *, 2> Args1{1, IntptrTy};
     MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to