https://github.com/SharonXSharon updated https://github.com/llvm/llvm-project/pull/182224
>From cc9b378b53771dba5ba71124e91b121c7145c4bf Mon Sep 17 00:00:00 2001 From: Sharon Xu <[email protected]> Date: Wed, 18 Feb 2026 13:49:49 -0800 Subject: [PATCH] [memprof] Add Darwin platform support, fine-granularity shadow, and V6 raw profile format Add memprof support for Darwin/Apple platforms, decouple shadow memory granularity from histogram collection, and extend the raw profile format with a memory block address section. Darwin platform support: - Add `ARM64` to `MEMPROF_SUPPORTED_ARCH` and Darwin to the OS match in `config-ix.cmake`. - Restructure `CMakeLists.txt` following the asan pattern: shared-only runtime on Apple, static+shared on other platforms. - Add `memprof_mac.cpp` with `FindDynamicShadowStart()`, `InitializePlatformInterceptors()`, and `MemprofDlSymNext()`. - Add `memprof_malloc_mac.cpp` using `COMMON_MALLOC_*` macros from `sanitizer_malloc_mac.inc` for Darwin malloc zone interception. - Update `memprof_interceptors.h` to no-op `MEMPROF_INTERCEPT_FUNC` on Apple (DYLD interposition handles it). - Update `memprof_interceptors.cpp` with `SANITIZER_APPLE` paths in `COMMON_INTERCEPTOR_ENTER`, `strcpy`, `index`, and `__strdup`. - Add `aarch64` support to `memprof_allocator.h` and `sched_getcpu()` fallback in `memprof_allocator.cpp`. - Change `memprof_linux.cpp` and `memprof_malloc_linux.cpp` from `#error` to `#if SANITIZER_LINUX` guards. - Update `lit.cfg.py` for Darwin test support (`DYLD_LIBRARY_PATH`, dylib naming, `-ldl` conditionalization). - Add `memprof_malloc_mac.cpp` to `isRuntimePath()` in `MemProfReader.cpp`. - Add Darwin-specific tests and platform guards for Linux-only tests (`stress_dtls.c`, `free_sized.cpp`). Shadow memory granularity refactoring: - Add `-memprof-fine-granularity` flag that enables 8-byte shadow granularity (same as `-memprof-histogram`) without collecting per-bucket histogram arrays. - Add `__memprof_fine_granularity` weak global variable for compile-to-runtime communication. - Update `ShadowMapping`, `instrumentAddress`, and `initializeCallbacks` to use fine granularity when either `ClHistogram` or `ClFineGranularity` is set. - Add `UseFineGranularity()` helper in the runtime; update `ClearShadow` and `CreateNewMIB` for three-way dispatch (histogram, fine-granularity, standard). Raw profile format V6: - Bump `MEMPROF_RAW_VERSION` to 6; add 6 to supported versions. - Add `MemAddressOffset` field to `Header` in `MemProfData.inc` (both LLVM and compiler-rt copies). - Add memory block address section: `NumEntries` (u64) followed by allocation addresses (u64 each). - Update `SerializeToRawProfile` to accept and serialize memory block addresses; collect live block addresses in `InsertLiveBlocks`. - Add `readMemBlockAddresses()` and `readMemInfoBlocksV6()` to `MemProfReader.cpp`; store addresses in `RawMemProfReader`. - Update `rawprofile.cpp` unit test for V6 header layout. --- clang/lib/Driver/ToolChains/Darwin.cpp | 5 + .../cmake/Modules/AllSupportedArchDefs.cmake | 2 +- compiler-rt/cmake/config-ix.cmake | 3 +- compiler-rt/include/profile/MemProfData.inc | 6 +- compiler-rt/lib/memprof/CMakeLists.txt | 178 ++++++++++-------- compiler-rt/lib/memprof/memprof_allocator.cpp | 97 +++++++++- compiler-rt/lib/memprof/memprof_allocator.h | 7 +- .../lib/memprof/memprof_interceptors.cpp | 29 ++- .../lib/memprof/memprof_interceptors.h | 9 +- .../lib/memprof/memprof_interface_internal.h | 3 + compiler-rt/lib/memprof/memprof_internal.h | 5 +- compiler-rt/lib/memprof/memprof_linux.cpp | 6 +- compiler-rt/lib/memprof/memprof_mac.cpp | 54 ++++++ .../lib/memprof/memprof_malloc_linux.cpp | 6 +- .../lib/memprof/memprof_malloc_mac.cpp | 77 ++++++++ .../lib/memprof/memprof_rawprofile.cpp | 50 ++++- compiler-rt/lib/memprof/memprof_rawprofile.h | 4 + compiler-rt/lib/memprof/memprof_rtl.cpp | 3 + compiler-rt/lib/memprof/tests/CMakeLists.txt | 31 ++- compiler-rt/lib/memprof/tests/rawprofile.cpp | 91 +++++---- compiler-rt/lib/memprof/weak_symbols.txt | 4 +- compiler-rt/test/memprof/CMakeLists.txt | 2 +- .../memprof/TestCases/Darwin/lit.local.cfg.py | 10 + .../memprof/TestCases/Darwin/malloc_zone.c | 30 +++ .../TestCases/Darwin/malloc_zone_allocators.c | 41 ++++ .../memprof/TestCases/Darwin/new_delete.cpp | 29 +++ .../memprof/TestCases/Darwin/raw_profile.cpp | 19 ++ .../TestCases/Darwin/symbolize_raw_stacks.py | 63 +++++++ .../test/memprof/TestCases/free_sized.cpp | 1 + .../test/memprof/TestCases/stress_dtls.c | 1 + .../test/memprof/Unit/lit.site.cfg.py.in | 5 +- compiler-rt/test/memprof/lit.cfg.py | 36 ++-- llvm/include/llvm/ProfileData/MemProfData.inc | 6 +- llvm/include/llvm/ProfileData/MemProfReader.h | 9 + llvm/lib/ProfileData/MemProfReader.cpp | 31 +++ .../MemProfInstrumentation.cpp | 52 ++++- 36 files changed, 827 insertions(+), 178 deletions(-) create mode 100644 compiler-rt/lib/memprof/memprof_mac.cpp create mode 100644 compiler-rt/lib/memprof/memprof_malloc_mac.cpp create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp create mode 100644 compiler-rt/test/memprof/TestCases/Darwin/symbolize_raw_stacks.py diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 74fcb10c0be22..b172c83038ae4 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1669,6 +1669,11 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, AddLinkRuntimeLib(Args, CmdArgs, "stats_client", RLO_AlwaysLink); AddLinkSanitizerLibArgs(Args, CmdArgs, "stats"); } + if (Sanitize.needsMemProfRt()) { + assert(Sanitize.needsSharedRt() && + "Static sanitizer runtimes not supported"); + AddLinkSanitizerLibArgs(Args, CmdArgs, "memprof"); + } } if (Sanitize.needsMemProfRt()) diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index c2de0d0f652e8..c5642bdf3ca58 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -89,7 +89,7 @@ else() endif() set(ALL_NSAN_SUPPORTED_ARCH ${X86_64}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64}) -set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) +set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 1f82ff3cf7531..330663935d30c 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -463,6 +463,7 @@ if(APPLE) set(SANITIZER_COMMON_SUPPORTED_OS osx) set(PROFILE_SUPPORTED_OS osx) + set(MEMPROF_SUPPORTED_OS osx) set(TSAN_SUPPORTED_OS osx) set(TYSAN_SUPPORTED_OS osx) set(XRAY_SUPPORTED_OS osx) @@ -835,7 +836,7 @@ else() endif() if (COMPILER_RT_HAS_SANITIZER_COMMON AND MEMPROF_SUPPORTED_ARCH AND - OS_NAME MATCHES "Linux") + OS_NAME MATCHES "Darwin|Linux") set(COMPILER_RT_HAS_MEMPROF TRUE) else() set(COMPILER_RT_HAS_MEMPROF FALSE) diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc index 26badddae6f3a..5be4c0cc2802a 100644 --- a/compiler-rt/include/profile/MemProfData.inc +++ b/compiler-rt/include/profile/MemProfData.inc @@ -33,10 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 5ULL +#define MEMPROF_RAW_VERSION 6ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL, 6ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -52,6 +52,8 @@ PACKED(struct Header { uint64_t SegmentOffset; uint64_t MIBOffset; uint64_t StackOffset; + // Added in V6: offset to memory block address section. + uint64_t MemAddressOffset; }); // A struct describing the information necessary to describe a /proc/maps diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt index e6d99daca6ee7..30d8fa6885b45 100644 --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -7,7 +7,9 @@ set(MEMPROF_SOURCES memprof_interceptors.cpp memprof_interceptors_memintrinsics.cpp memprof_linux.cpp + memprof_mac.cpp memprof_malloc_linux.cpp + memprof_malloc_mac.cpp memprof_mibmap.cpp memprof_posix.cpp memprof_rawprofile.cpp @@ -78,7 +80,7 @@ append_list_if(COMPILER_RT_HAS_LIBLOG log MEMPROF_DYNAMIC_LIBS) # Compile MemProf sources into an object library. add_compiler_rt_object_libraries(RTMemprof_dynamic - OS ${SANITIZER_COMMON_SUPPORTED_OS} + OS ${MEMPROF_SUPPORTED_OS} ARCHS ${MEMPROF_SUPPORTED_ARCH} SOURCES ${MEMPROF_SOURCES} ${MEMPROF_CXX_SOURCES} ADDITIONAL_HEADERS ${MEMPROF_HEADERS} @@ -86,6 +88,7 @@ add_compiler_rt_object_libraries(RTMemprof_dynamic DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} DEPS ${MEMPROF_DEPS}) +if(NOT APPLE) add_compiler_rt_object_libraries(RTMemprof ARCHS ${MEMPROF_SUPPORTED_ARCH} SOURCES ${MEMPROF_SOURCES} @@ -115,94 +118,115 @@ add_compiler_rt_object_libraries(RTMemprof_dynamic_version_script_dummy CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} DEPS ${MEMPROF_DEPS}) +endif() # Build MemProf runtimes shipped with Clang. add_compiler_rt_component(memprof) -# Build separate libraries for each target. - -set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS - RTInterception - RTSanitizerCommon - RTSanitizerCommonLibc - RTSanitizerCommonCoverage - RTSanitizerCommonSymbolizer - # FIXME: hangs. - # RTSanitizerCommonSymbolizerInternal -) - -add_compiler_rt_runtime(clang_rt.memprof - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_preinit - RTMemprof - ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -add_compiler_rt_runtime(clang_rt.memprof_cxx - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_cxx - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -add_compiler_rt_runtime(clang_rt.memprof-preinit - STATIC - ARCHS ${MEMPROF_SUPPORTED_ARCH} - OBJECT_LIBS RTMemprof_preinit - CFLAGS ${MEMPROF_CFLAGS} - DEFS ${MEMPROF_COMMON_DEFINITIONS} - PARENT_TARGET memprof) - -foreach(arch ${MEMPROF_SUPPORTED_ARCH}) - if (UNIX) - add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch} - LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch} - EXTRA memprof.syms.extra) - set(VERSION_SCRIPT_FLAG - -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) - set_property(SOURCE - ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp - APPEND PROPERTY - OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) - else() - set(VERSION_SCRIPT_FLAG) - endif() - - set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION) +if(APPLE) + add_weak_symbols("memprof" WEAK_SYMBOL_LINK_FLAGS) + add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) add_compiler_rt_runtime(clang_rt.memprof SHARED - ARCHS ${arch} - OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} - RTMemprof_dynamic - # The only purpose of RTMemprof_dynamic_version_script_dummy is to - # carry a dependency of the shared runtime on the version script. - # Replacing it with a straightforward - # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list) - # generates an order-only dependency in ninja. - RTMemprof_dynamic_version_script_dummy - ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION} + OS ${MEMPROF_SUPPORTED_OS} + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_dynamic + RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + RTSanitizerCommonCoverage + RTSanitizerCommonSymbolizer CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} - LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS} - ${VERSION_SCRIPT_FLAG} - LINK_LIBS ${MEMPROF_DYNAMIC_LIBS} + LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} PARENT_TARGET memprof) +else() + # Build separate libraries for each target. + + set(MEMPROF_COMMON_RUNTIME_OBJECT_LIBS + RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + RTSanitizerCommonCoverage + RTSanitizerCommonSymbolizer + # FIXME: hangs. + # RTSanitizerCommonSymbolizerInternal + ) + + add_compiler_rt_runtime(clang_rt.memprof + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_preinit + RTMemprof + ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) - if (SANITIZER_USE_SYMBOLS) - add_sanitizer_rt_symbols(clang_rt.memprof_cxx - ARCHS ${arch}) - add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols) - add_sanitizer_rt_symbols(clang_rt.memprof + add_compiler_rt_runtime(clang_rt.memprof_cxx + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_cxx + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) + + add_compiler_rt_runtime(clang_rt.memprof-preinit + STATIC + ARCHS ${MEMPROF_SUPPORTED_ARCH} + OBJECT_LIBS RTMemprof_preinit + CFLAGS ${MEMPROF_CFLAGS} + DEFS ${MEMPROF_COMMON_DEFINITIONS} + PARENT_TARGET memprof) + + foreach(arch ${MEMPROF_SUPPORTED_ARCH}) + if (UNIX) + add_sanitizer_rt_version_list(clang_rt.memprof-dynamic-${arch} + LIBS clang_rt.memprof-${arch} clang_rt.memprof_cxx-${arch} + EXTRA memprof.syms.extra) + set(VERSION_SCRIPT_FLAG + -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) + set_property(SOURCE + ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp + APPEND PROPERTY + OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.memprof-dynamic-${arch}.vers) + else() + set(VERSION_SCRIPT_FLAG) + endif() + + set(MEMPROF_DYNAMIC_WEAK_INTERCEPTION) + + add_compiler_rt_runtime(clang_rt.memprof + SHARED ARCHS ${arch} - EXTRA memprof.syms.extra) - add_dependencies(memprof clang_rt.memprof-${arch}-symbols) - endif() -endforeach() + OBJECT_LIBS ${MEMPROF_COMMON_RUNTIME_OBJECT_LIBS} + RTMemprof_dynamic + # The only purpose of RTMemprof_dynamic_version_script_dummy is to + # carry a dependency of the shared runtime on the version script. + # Replacing it with a straightforward + # add_dependencies(clang_rt.memprof-dynamic-${arch} clang_rt.memprof-dynamic-${arch}-version-list) + # generates an order-only dependency in ninja. + RTMemprof_dynamic_version_script_dummy + ${MEMPROF_DYNAMIC_WEAK_INTERCEPTION} + CFLAGS ${MEMPROF_DYNAMIC_CFLAGS} + LINK_FLAGS ${MEMPROF_DYNAMIC_LINK_FLAGS} + ${VERSION_SCRIPT_FLAG} + LINK_LIBS ${MEMPROF_DYNAMIC_LIBS} + DEFS ${MEMPROF_DYNAMIC_DEFINITIONS} + PARENT_TARGET memprof) + + if (SANITIZER_USE_SYMBOLS) + add_sanitizer_rt_symbols(clang_rt.memprof_cxx + ARCHS ${arch}) + add_dependencies(memprof clang_rt.memprof_cxx-${arch}-symbols) + add_sanitizer_rt_symbols(clang_rt.memprof + ARCHS ${arch} + EXTRA memprof.syms.extra) + add_dependencies(memprof clang_rt.memprof-${arch}-symbols) + endif() + endforeach() +endif() if(COMPILER_RT_INCLUDE_TESTS) diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp index 60f5c853f9d76..9bdf4d4e89383 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -31,17 +31,28 @@ #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_stackdepot.h" +#if SANITIZER_LINUX #include <sched.h> +#elif SANITIZER_APPLE +#include <dlfcn.h> +#endif #include <time.h> #define MAX_HISTOGRAM_PRINT_SIZE 32U extern bool __memprof_histogram; +extern bool __memprof_fine_granularity; namespace __memprof { namespace { using ::llvm::memprof::MemInfoBlock; +// Returns true if the shadow uses fine (8-byte) granularity, which is the +// case for both histogram mode and fine-granularity mode. +static bool UseFineGranularity() { + return __memprof_histogram || __memprof_fine_granularity; +} + void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { u64 p; @@ -84,13 +95,26 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { } } // namespace +#if SANITIZER_APPLE +using OsCpuNumberFn = int (*)(); +static OsCpuNumberFn os_cpu_number_fn = nullptr; +#endif + static int GetCpuId(void) { // _memprof_preinit is called via the preinit_array, which subsequently calls // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu // will seg fault as the address of __vdso_getcpu will be null. if (!memprof_inited) return -1; +#if SANITIZER_LINUX return sched_getcpu(); +#elif SANITIZER_APPLE + if (os_cpu_number_fn) + return os_cpu_number_fn(); + return -1; +#else + return -1; +#endif } // Compute the timestamp in ms. @@ -248,7 +272,7 @@ void ClearShadow(uptr addr, uptr size) { CHECK(REAL(memset)); uptr shadow_beg; uptr shadow_end; - if (__memprof_histogram) { + if (UseFineGranularity()) { shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr); shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size); } else { @@ -314,6 +338,13 @@ struct Allocator { static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) { if (__memprof_histogram) { return CreateNewMIBWithHistogram(p, m, user_size); + } else if (__memprof_fine_granularity) { + // Fine granularity uses histogram-style shadow layout for counting, + // but does not collect per-bucket histograms. + u64 c = GetShadowCountHistogram(p, user_size); + long curtime = GetTimestamp(); + return MemInfoBlock(user_size, c, m->timestamp_ms, curtime, m->cpu_id, + GetCpuId(), 0, 0); } else { return CreateNewMIBWithoutHistogram(p, m, user_size); } @@ -354,13 +385,38 @@ struct Allocator { allocator.ForceLock(); - InsertLiveBlocks(); + Vector<u64> MemBlockAddresses; + InsertLiveBlocks(MemBlockAddresses); if (flags()->print_text) { if (!flags()->print_terse) Printf("Recorded MIBs (incl. live on exit):\n"); MIBMap.ForEach(PrintCallback, reinterpret_cast<void *>(flags()->print_terse)); + // On Apple platforms, StackDepotPrintAll() calls + // StackTrace::Print() which triggers symbolization via dladdr(). + // During atexit, this can deadlock because dladdr() acquires dyld + // internal locks that may already be held. Instead, print raw + // unsymbolized addresses that can be post-processed offline with + // atos or llvm-symbolizer. +#if SANITIZER_APPLE + Vector<u64> StackIds; + MIBMap.ForEach( + [](const uptr Key, LockedMemInfoBlock *const &, void *Arg) { + auto *StackIds = reinterpret_cast<Vector<u64> *>(Arg); + StackIds->PushBack(Key); + }, + reinterpret_cast<void *>(&StackIds)); + for (uptr i = 0; i < StackIds.Size(); i++) { + u32 Id = static_cast<u32>(StackIds[i]); + StackTrace St = StackDepotGet(Id); + Printf("Stack for id %u:\n", Id); + for (u32 j = 0; j < St.size; j++) + Printf(" #%u 0x%zx\n", j, St.trace[j]); + Printf("\n"); + } +#else StackDepotPrintAll(); +#endif } else { // Serialize the contents to a raw profile. Format documented in // memprof_rawprofile.h. @@ -369,7 +425,8 @@ struct Allocator { __sanitizer::ListOfModules List; List.init(); ArrayRef<LoadedModule> Modules(List.begin(), List.end()); - u64 BytesSerialized = SerializeToRawProfile(MIBMap, Modules, Buffer); + u64 BytesSerialized = + SerializeToRawProfile(MIBMap, Modules, MemBlockAddresses, Buffer); CHECK(Buffer && BytesSerialized && "could not serialize to buffer"); report_file.Write(Buffer, BytesSerialized); } @@ -378,20 +435,27 @@ struct Allocator { } // Inserts any blocks which have been allocated but not yet deallocated. - void InsertLiveBlocks() { + // Also records their addresses in MemBlockAddresses. + void InsertLiveBlocks(Vector<u64> &MemBlockAddresses) { + struct InsertLiveBlocksCtx { + Allocator *A; + Vector<u64> *Addrs; + }; + InsertLiveBlocksCtx Ctx{this, &MemBlockAddresses}; allocator.ForEachChunk( - [](uptr chunk, void *alloc) { + [](uptr chunk, void *arg) { + auto *Ctx = (InsertLiveBlocksCtx *)arg; u64 user_requested_size; - Allocator *A = (Allocator *)alloc; MemprofChunk *m = - A->GetMemprofChunk((void *)chunk, user_requested_size); + Ctx->A->GetMemprofChunk((void *)chunk, user_requested_size); if (!m) return; uptr user_beg = ((uptr)m) + kChunkHeaderSize; MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size); - InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap); + InsertOrMerge(m->alloc_context_id, newMIB, Ctx->A->MIBMap); + Ctx->Addrs->PushBack(static_cast<u64>(user_beg)); }, - this); + &Ctx); } void InitLinkerInitialized() { @@ -641,7 +705,12 @@ static Allocator instance(LINKER_INITIALIZED); static MemprofAllocator &get_allocator() { return instance.allocator; } -void InitializeAllocator() { instance.InitLinkerInitialized(); } +void InitializeAllocator() { + instance.InitLinkerInitialized(); +#if SANITIZER_APPLE + os_cpu_number_fn = (OsCpuNumberFn)dlsym(RTLD_DEFAULT, "_os_cpu_number"); +#endif +} void MemprofThreadLocalMallocStorage::CommitBack() { instance.CommitBack(this); @@ -766,6 +835,14 @@ uptr memprof_malloc_usable_size(const void *ptr) { return usable_size; } +uptr memprof_mz_size(const void *ptr) { + return memprof_malloc_usable_size(ptr); +} + +void memprof_mz_force_lock() { instance.ForceLock(); } + +void memprof_mz_force_unlock() { instance.ForceUnlock(); } + } // namespace __memprof // ---------------------- Interface ---------------- {{{1 diff --git a/compiler-rt/lib/memprof/memprof_allocator.h b/compiler-rt/lib/memprof/memprof_allocator.h index 6d898f06f7e42..99fdc4a325fbf 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.h +++ b/compiler-rt/lib/memprof/memprof_allocator.h @@ -20,7 +20,7 @@ #include "sanitizer_common/sanitizer_allocator.h" #include "sanitizer_common/sanitizer_list.h" -#if !defined(__x86_64__) +#if !defined(__x86_64__) && !defined(__aarch64__) #error Unsupported platform #endif #if !SANITIZER_CAN_USE_ALLOCATOR64 @@ -103,5 +103,10 @@ uptr memprof_malloc_usable_size(const void *ptr); void PrintInternalAllocatorStats(); +// Mac-specific malloc zone functions. +uptr memprof_mz_size(const void *ptr); +void memprof_mz_force_lock(); +void memprof_mz_force_unlock(); + } // namespace __memprof #endif // MEMPROF_ALLOCATOR_H diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp index f4d7fd46e6198..c142a66d5c7f7 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.cpp +++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp @@ -63,9 +63,14 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *) #define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \ MEMPROF_INTERCEPTOR_ENTER(ctx, func); \ do { \ - if (memprof_init_is_running) \ - return REAL(func)(__VA_ARGS__); \ - ENSURE_MEMPROF_INITED(); \ + if constexpr (SANITIZER_APPLE) { \ + if (UNLIKELY(!memprof_inited)) \ + return REAL(func)(__VA_ARGS__); \ + } else { \ + if (memprof_init_is_running) \ + return REAL(func)(__VA_ARGS__); \ + ENSURE_MEMPROF_INITED(); \ + } \ } while (false) #define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \ do { \ @@ -168,8 +173,13 @@ INTERCEPTOR(int, pthread_join, void *t, void **arg) { DEFINE_INTERNAL_PTHREAD_FUNCTIONS +#if SANITIZER_APPLE +DECLARE_REAL(char *, index, const char *string, int c) +OVERRIDE_FUNCTION(index, strchr); +#else INTERCEPTOR(char *, index, const char *string, int c) ALIAS(WRAP(strchr)); +#endif // For both strcat() and strncat() we need to check the validity of |to| // argument irrespective of the |from| length. @@ -201,8 +211,13 @@ INTERCEPTOR(char *, strncat, char *to, const char *from, usize size) { INTERCEPTOR(char *, strcpy, char *to, const char *from) { void *ctx; MEMPROF_INTERCEPTOR_ENTER(ctx, strcpy); - if (memprof_init_is_running) { - return REAL(strcpy)(to, from); + if constexpr (SANITIZER_APPLE) { + if (UNLIKELY(!memprof_inited)) + return REAL(strcpy)(to, from); + } else { + if (memprof_init_is_running) { + return REAL(strcpy)(to, from); + } } ENSURE_MEMPROF_INITED(); uptr from_size = internal_strlen(from) + 1; @@ -225,6 +240,7 @@ INTERCEPTOR(char *, strdup, const char *s) { return reinterpret_cast<char *>(new_mem); } +#if SANITIZER_LINUX INTERCEPTOR(char *, __strdup, const char *s) { void *ctx; MEMPROF_INTERCEPTOR_ENTER(ctx, strdup); @@ -238,6 +254,7 @@ INTERCEPTOR(char *, __strdup, const char *s) { REAL(memcpy)(new_mem, s, length + 1); return reinterpret_cast<char *>(new_mem); } +#endif // SANITIZER_LINUX INTERCEPTOR(char *, strncpy, char *to, const char *from, usize size) { void *ctx; @@ -320,7 +337,9 @@ void InitializeMemprofInterceptors() { MEMPROF_INTERCEPT_FUNC(strncat); MEMPROF_INTERCEPT_FUNC(strncpy); MEMPROF_INTERCEPT_FUNC(strdup); +#if SANITIZER_LINUX MEMPROF_INTERCEPT_FUNC(__strdup); +#endif MEMPROF_INTERCEPT_FUNC(index); MEMPROF_INTERCEPT_FUNC(atoi); diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h index 53d685706b849..5b21cae811bcf 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.h +++ b/compiler-rt/lib/memprof/memprof_interceptors.h @@ -40,10 +40,11 @@ DECLARE_REAL(char *, strncpy, char *to, const char *from, SIZE_T size) DECLARE_REAL(SIZE_T, strnlen, const char *s, SIZE_T maxlen) DECLARE_REAL(char *, strstr, const char *s1, const char *s2) +#if !SANITIZER_APPLE #define MEMPROF_INTERCEPT_FUNC(name) \ do { \ if (!INTERCEPT_FUNCTION(name)) \ - VReport(1, "MemProfiler: failed to intercept '%s'\n'", #name); \ + VReport(1, "MemProfiler: failed to intercept '%s'\n", #name); \ } while (0) #define MEMPROF_INTERCEPT_FUNC_VER(name, ver) \ do { \ @@ -56,6 +57,12 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2) VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \ ver, #name); \ } while (0) +#else +// OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION. +#define MEMPROF_INTERCEPT_FUNC(name) +#define MEMPROF_INTERCEPT_FUNC_VER(name, ver) +#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) +#endif // !SANITIZER_APPLE #define MEMPROF_INTERCEPTOR_ENTER(ctx, func) \ ctx = 0; \ diff --git a/compiler-rt/lib/memprof/memprof_interface_internal.h b/compiler-rt/lib/memprof/memprof_interface_internal.h index 1fd07481a354d..648501c3ad4d1 100644 --- a/compiler-rt/lib/memprof/memprof_interface_internal.h +++ b/compiler-rt/lib/memprof/memprof_interface_internal.h @@ -61,6 +61,9 @@ SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern char SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern bool __memprof_histogram; +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE extern bool + __memprof_fine_granularity; + SANITIZER_INTERFACE_ATTRIBUTE int __memprof_profile_dump(); SANITIZER_INTERFACE_ATTRIBUTE void __memprof_profile_reset(); diff --git a/compiler-rt/lib/memprof/memprof_internal.h b/compiler-rt/lib/memprof/memprof_internal.h index ec9fa10badecd..bf896d60baf3b 100644 --- a/compiler-rt/lib/memprof/memprof_internal.h +++ b/compiler-rt/lib/memprof/memprof_internal.h @@ -52,11 +52,12 @@ void PrintAddressSpaceLayout(); // memprof_shadow_setup.cpp void InitializeShadowMemory(); -// memprof_malloc_linux.cpp +// memprof_malloc_linux.cpp / memprof_malloc_mac.cpp void ReplaceSystemMalloc(); -// memprof_linux.cpp +// memprof_linux.cpp / memprof_mac.cpp uptr FindDynamicShadowStart(); +void InitializePlatformExceptionHandlers(); // memprof_thread.cpp MemprofThread *CreateMainThread(); diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index fbe5d250f840b..1ac82be202d30 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -12,9 +12,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if !SANITIZER_LINUX -#error Unsupported OS -#endif +#if SANITIZER_LINUX #include "memprof_interceptors.h" #include "memprof_internal.h" @@ -65,3 +63,5 @@ uptr FindDynamicShadowStart() { void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } } // namespace __memprof + +#endif // SANITIZER_LINUX diff --git a/compiler-rt/lib/memprof/memprof_mac.cpp b/compiler-rt/lib/memprof/memprof_mac.cpp new file mode 100644 index 0000000000000..ece8d966db8ac --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_mac.cpp @@ -0,0 +1,54 @@ +//===-- memprof_mac.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +// Mac-specific details. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_APPLE + +#include "memprof_interceptors.h" +#include "memprof_internal.h" +#include "memprof_mapping.h" +#include "memprof_stack.h" +#include "memprof_thread.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_mac.h" + +#include <dlfcn.h> +#include <fcntl.h> +#include <libkern/OSAtomic.h> +#include <mach-o/dyld.h> +#include <pthread.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> +#include <unistd.h> + +namespace __memprof { + +void InitializePlatformInterceptors() {} +void InitializePlatformExceptionHandlers() {} + +uptr FindDynamicShadowStart() { + uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd); + return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE, + /*min_shadow_base_alignment*/ 0, kHighMemEnd, + GetMmapGranularity()); +} + +void *MemprofDlSymNext(const char *sym) { return dlsym(RTLD_NEXT, sym); } + +} // namespace __memprof + +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp index 68fe65475889a..d04d9ab26a158 100644 --- a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp @@ -14,9 +14,7 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_platform.h" -#if !SANITIZER_LINUX -#error Unsupported OS -#endif +#if SANITIZER_LINUX #include "memprof_allocator.h" #include "memprof_interceptors.h" @@ -163,3 +161,5 @@ INTERCEPTOR(void, malloc_stats, void) { __memprof_print_accumulated_stats(); } namespace __memprof { void ReplaceSystemMalloc() {} } // namespace __memprof + +#endif // SANITIZER_LINUX diff --git a/compiler-rt/lib/memprof/memprof_malloc_mac.cpp b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp new file mode 100644 index 0000000000000..ce0fc40c2a156 --- /dev/null +++ b/compiler-rt/lib/memprof/memprof_malloc_mac.cpp @@ -0,0 +1,77 @@ +//===-- memprof_malloc_mac.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemProfiler, a memory profiler. +// +// Mac-specific malloc interception. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_APPLE + +#include "memprof_allocator.h" +#include "memprof_interceptors.h" +#include "memprof_internal.h" +#include "memprof_stack.h" + +using namespace __memprof; +#define COMMON_MALLOC_ZONE_NAME "memprof" +#define COMMON_MALLOC_ENTER() \ + do { \ + MemprofInitFromRtl(); \ + } while (false) +#define COMMON_MALLOC_SANITIZER_INITIALIZED memprof_inited +#define COMMON_MALLOC_FORCE_LOCK() memprof_mz_force_lock() +#define COMMON_MALLOC_FORCE_UNLOCK() memprof_mz_force_unlock() +#define COMMON_MALLOC_MEMALIGN(alignment, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_memalign(alignment, size, &stack, FROM_MALLOC) +#define COMMON_MALLOC_MALLOC(size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_malloc(size, &stack) +#define COMMON_MALLOC_REALLOC(ptr, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_realloc(ptr, size, &stack); +#define COMMON_MALLOC_CALLOC(count, size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_calloc(count, size, &stack); +#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size) \ + GET_STACK_TRACE_MALLOC; \ + int res = memprof_posix_memalign(memptr, alignment, size, &stack); +#define COMMON_MALLOC_VALLOC(size) \ + GET_STACK_TRACE_MALLOC; \ + void *p = memprof_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC); +#define COMMON_MALLOC_FREE(ptr) \ + GET_STACK_TRACE_FREE; \ + memprof_free(ptr, &stack, FROM_MALLOC); +#define COMMON_MALLOC_SIZE(ptr) uptr size = memprof_mz_size(ptr); +#define COMMON_MALLOC_FILL_STATS(zone, stats) +#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) +#define COMMON_MALLOC_NAMESPACE __memprof +#define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0 +#define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 0 + +#include "sanitizer_common/sanitizer_malloc_mac.inc" + +namespace COMMON_MALLOC_NAMESPACE { + +bool HandleDlopenInit() { + static_assert(SANITIZER_SUPPORTS_INIT_FOR_DLOPEN, + "Expected SANITIZER_SUPPORTS_INIT_FOR_DLOPEN to be true"); + auto init_str = GetEnv("APPLE_MEMPROF_INIT_FOR_DLOPEN"); + if (!init_str) + return false; + if (internal_strncmp(init_str, "1", 1) != 0) + return false; + InitMallocZoneFields(); + return true; +} + +} // namespace COMMON_MALLOC_NAMESPACE + +#endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp index f579e12b15d0a..0af57e1c5ea96 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -189,6 +189,30 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && "Expected num bytes != actual bytes written"); } + +// The memory address section uses the following format: +// ---------- Mem Address Info +// Num Entries (u64) +// ---------- Address Entry +// Address (u64) +// ---------- +// ... +u64 MemAddressSizeBytes(const Vector<u64> &Addresses) { + return sizeof(u64) + Addresses.Size() * sizeof(u64); +} + +void SerializeMemAddressesToBuffer(const Vector<u64> &Addresses, + const u64 ExpectedNumBytes, char *&Buffer) { + char *Ptr = Buffer; + const u64 NumEntries = Addresses.Size(); + Ptr = WriteBytes(NumEntries, Ptr); + for (u64 i = 0; i < NumEntries; i++) { + Ptr = WriteBytes(Addresses[i], Ptr); + } + CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && + "Expected num bytes != actual bytes written"); +} + } // namespace // Format @@ -199,6 +223,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, // Segment Offset // MIB Info Offset // Stack Offset +// Mem Address Offset // ---------- Segment Info // Num Entries // ---------- Segment Entry @@ -234,9 +259,16 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, // ... // ---------- // Optional Padding Bytes +// ---------- Mem Address Info (V6+) +// Num Entries +// ---------- Address Entry +// Address (u64) +// ---------- +// ... +// Optional Padding Bytes // ... u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, - char *&Buffer) { + Vector<u64> &MemBlockAddresses, char *&Buffer) { // Each section size is rounded up to 8b since the first entry in each section // is a u64 which holds the number of entries in the section by convention. const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Modules), 8); @@ -262,25 +294,32 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8); + const u64 NumMemAddressBytes = + RoundUpTo(MemAddressSizeBytes(MemBlockAddresses), 8); + // Ensure that the profile is 8b aligned. We allow for some optional padding // at the end so that any subsequent profile serialized to the same file does // not incur unaligned accesses. const u64 TotalSizeBytes = RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes + - NumMIBInfoBytes + NumHistogramBytes, + NumMIBInfoBytes + NumHistogramBytes + NumMemAddressBytes, 8); // Allocate the memory for the entire buffer incl. info blocks. Buffer = (char *)InternalAlloc(TotalSizeBytes); char *Ptr = Buffer; + const u64 StackOffset = + sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes + NumHistogramBytes; + const u64 MemAddressOffset = StackOffset + NumStackBytes; + Header header{MEMPROF_RAW_MAGIC_64, MEMPROF_RAW_VERSION, static_cast<u64>(TotalSizeBytes), sizeof(Header), sizeof(Header) + NumSegmentBytes, - sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes + - NumHistogramBytes}; + StackOffset, + MemAddressOffset}; Ptr = WriteBytes(header, Ptr); SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr); @@ -291,6 +330,9 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules, Ptr += NumMIBInfoBytes + NumHistogramBytes; SerializeStackToBuffer(StackIds, NumStackBytes, Ptr); + Ptr += NumStackBytes; + + SerializeMemAddressesToBuffer(MemBlockAddresses, NumMemAddressBytes, Ptr); return TotalSizeBytes; } diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h index e2494175f165e..7f66498a2debb 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.h +++ b/compiler-rt/lib/memprof/memprof_rawprofile.h @@ -4,11 +4,15 @@ #include "memprof_mibmap.h" #include "sanitizer_common/sanitizer_array_ref.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_vector.h" namespace __memprof { // Serialize the in-memory representation of the memprof profile to the raw // binary format. The format itself is documented memprof_rawprofile.cpp. +// MemBlockAddresses contains the addresses of memory blocks observed during +// profiling. u64 SerializeToRawProfile(MIBMapTy &BlockCache, ArrayRef<LoadedModule> Modules, + __sanitizer::Vector<u64> &MemBlockAddresses, char *&Buffer); } // namespace __memprof diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp index 4fd4b5210a7ec..ed1d497661be9 100644 --- a/compiler-rt/lib/memprof/memprof_rtl.cpp +++ b/compiler-rt/lib/memprof/memprof_rtl.cpp @@ -37,6 +37,9 @@ SANITIZER_WEAK_ATTRIBUTE char __memprof_profile_filename[1]; // Share ClHistogram compiler flag with runtime. SANITIZER_WEAK_ATTRIBUTE bool __memprof_histogram; +// Share ClFineGranularity compiler flag with runtime. +SANITIZER_WEAK_ATTRIBUTE bool __memprof_fine_granularity; + namespace __memprof { static void MemprofDie() { diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt index 1603d47d019ed..080dfc0c3499e 100644 --- a/compiler-rt/lib/memprof/tests/CMakeLists.txt +++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt @@ -42,6 +42,14 @@ if(NOT WIN32) list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -pthread) endif() +if(APPLE) + list(APPEND MEMPROF_UNITTEST_CFLAGS ${DARWIN_osx_CFLAGS}) + list(APPEND MEMPROF_UNITTEST_LINK_FLAGS ${DARWIN_osx_LINK_FLAGS}) + + add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) + list(APPEND MEMPROF_UNITTEST_LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}) +endif() + set(MEMPROF_UNITTEST_DEPS) if (TARGET cxx-headers OR HAVE_LIBCXX) list(APPEND MEMPROF_UNITTEST_DEPS cxx-headers) @@ -54,13 +62,22 @@ append_list_if(COMPILER_RT_HAS_LIBDL -ldl MEMPROF_UNITTEST_LINK_LIBRARIES) # Adds memprof tests for each architecture. macro(add_memprof_tests_for_arch arch) - set(MEMPROF_TEST_RUNTIME_OBJECTS - $<TARGET_OBJECTS:RTSanitizerCommon.${arch}> - $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}> - $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}> - $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}> - $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}> - ) + if(APPLE) + set(MEMPROF_TEST_RUNTIME_OBJECTS + $<TARGET_OBJECTS:RTSanitizerCommon.osx> + $<TARGET_OBJECTS:RTSanitizerCommonCoverage.osx> + $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx> + $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.osx> + ) + else() + set(MEMPROF_TEST_RUNTIME_OBJECTS + $<TARGET_OBJECTS:RTSanitizerCommon.${arch}> + $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}> + $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}> + $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}> + $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}> + ) + endif() set(MEMPROF_TEST_RUNTIME RTMemProfTest.${arch}) add_library(${MEMPROF_TEST_RUNTIME} STATIC ${MEMPROF_TEST_RUNTIME_OBJECTS}) set_target_properties(${MEMPROF_TEST_RUNTIME} PROPERTIES diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp index 5764af9ce8afb..01fdcd9fb7230 100644 --- a/compiler-rt/lib/memprof/tests/rawprofile.cpp +++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp @@ -9,6 +9,7 @@ #include "sanitizer_common/sanitizer_procmaps.h" #include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_stacktrace.h" +#include "sanitizer_common/sanitizer_vector.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -47,8 +48,9 @@ TEST(MemProf, Basic) { __sanitizer::LoadedModule FakeModule; FakeModule.addAddressRange(/*begin=*/0x10, /*end=*/0x20, /*executable=*/true, /*writable=*/false, /*name=*/""); - const char uuid[MEMPROF_BUILDID_MAX_SIZE] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE}; - FakeModule.setUuid(uuid, MEMPROF_BUILDID_MAX_SIZE); + const char uuid[__sanitizer::kModuleUUIDSize] = {0xC, 0x0, 0xF, + 0xF, 0xE, 0xE}; + FakeModule.setUuid(uuid, __sanitizer::kModuleUUIDSize); __sanitizer::ArrayRef<__sanitizer::LoadedModule> Modules(&FakeModule, (&FakeModule) + 1); @@ -60,12 +62,18 @@ TEST(MemProf, Basic) { FakeMIB.AllocCount = 0x1; FakeMIB.TotalAccessCount = 0x2; + // Use large enough PC values to avoid underflow when + // GetPreviousInstructionPc subtracts an architecture-dependent value. uint64_t FakeIds[2]; - FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/2, FakeMap); - FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/3, FakeMap); + FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/100, FakeMap); + FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/200, FakeMap); char *Ptr = nullptr; - uint64_t NumBytes = SerializeToRawProfile(FakeMap, Modules, Ptr); + __sanitizer::Vector<uint64_t> FakeAddresses; + FakeAddresses.PushBack(0x100); + FakeAddresses.PushBack(0x200); + uint64_t NumBytes = + SerializeToRawProfile(FakeMap, Modules, FakeAddresses, Ptr); const char *Buffer = Ptr; ASSERT_GT(NumBytes, 0ULL); @@ -78,39 +86,54 @@ TEST(MemProf, Basic) { const uint64_t SegmentOffset = Read(Ptr); const uint64_t MIBOffset = Read(Ptr); const uint64_t StackOffset = Read(Ptr); + const uint64_t MemAddressOffset = Read(Ptr); // ============= Check sizes and padding. EXPECT_EQ(TotalSize, NumBytes); EXPECT_EQ(TotalSize % 8, 0ULL); - // Should be equal to the size of the raw profile header. - EXPECT_EQ(SegmentOffset, 48ULL); + // Should be equal to the size of the raw profile header (7 fields * 8 bytes). + EXPECT_EQ(SegmentOffset, 56ULL); // We expect only 1 segment entry, 8b for the count and 64b for SegmentEntry // in memprof_rawprofile.cpp. EXPECT_EQ(MIBOffset - SegmentOffset, 72ULL); - EXPECT_EQ(MIBOffset, 120ULL); + EXPECT_EQ(MIBOffset, 128ULL); // We expect 2 mib entry, 8b for the count and sizeof(uint64_t) + // sizeof(MemInfoBlock) contains stack id + MeminfoBlock. EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock))); - EXPECT_EQ(StackOffset, 432ULL); + EXPECT_EQ(StackOffset, 440ULL); // We expect 2 stack entries, with 5 frames - 8b for total count, // 2 * (8b for id, 8b for frame count and 5*8b for fake frames). - // Since this is the last section, there may be additional padding at the end - // to make the total profile size 8b aligned. - EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8)); + // Since this is no longer the last section, check the exact size. + EXPECT_GE(MemAddressOffset - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8)); + + // We expect 2 address entries: 8b for count + 2 * 8b for addresses. + EXPECT_GE(TotalSize - MemAddressOffset, 8ULL + 2 * 8); // ============= Check contents. - unsigned char ExpectedSegmentBytes[72] = { - 0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries - 0x10, 0, 0, 0, 0, 0, 0, 0, // Start - 0x20, 0, 0, 0, 0, 0, 0, 0, // End - 0x0, 0, 0, 0, 0, 0, 0, 0, // Offset - 0x20, 0, 0, 0, 0, 0, 0, 0, // UuidSize - 0xC, 0x0, 0xF, 0xF, 0xE, 0xE // Uuid - }; + // Build expected segment bytes dynamically since uuid size varies by + // platform (kModuleUUIDSize is 16 on Apple, 32 on Linux). + unsigned char ExpectedSegmentBytes[72] = {}; + // Number of entries = 1 + ExpectedSegmentBytes[0] = 0x01; + // Start = 0x10 + ExpectedSegmentBytes[8] = 0x10; + // End = 0x20 + ExpectedSegmentBytes[16] = 0x20; + // Offset = 0x0 (base_address_ is 0 for default-constructed LoadedModule) + // BuildIdSize = kModuleUUIDSize + ExpectedSegmentBytes[32] = + static_cast<unsigned char>(__sanitizer::kModuleUUIDSize); + // Uuid + ExpectedSegmentBytes[40] = 0xC; + ExpectedSegmentBytes[41] = 0x0; + ExpectedSegmentBytes[42] = 0xF; + ExpectedSegmentBytes[43] = 0xF; + ExpectedSegmentBytes[44] = 0xE; + ExpectedSegmentBytes[45] = 0xE; EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 72), 0); // Check that the number of entries is 2. @@ -139,21 +162,19 @@ TEST(MemProf, Basic) { // Check that the 1st stack id is set. EXPECT_EQ(*reinterpret_cast<const uint64_t *>(Buffer + StackOffset + 8), FakeIds[0]); - // Contents are num pcs, value of each pc - 1. - unsigned char ExpectedStackBytes[2][6 * 8] = { - { - 0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs - 0x1, 0, 0, 0, 0, 0, 0, 0, // PC ... - 0x2, 0, 0, 0, 0, 0, 0, 0, 0x3, 0, 0, 0, 0, 0, 0, 0, - 0x4, 0, 0, 0, 0, 0, 0, 0, 0x5, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs - 0x2, 0, 0, 0, 0, 0, 0, 0, // PC ... - 0x3, 0, 0, 0, 0, 0, 0, 0, 0x4, 0, 0, 0, 0, 0, 0, 0, - 0x5, 0, 0, 0, 0, 0, 0, 0, 0x6, 0, 0, 0, 0, 0, 0, 0, - }, - }; + // Build expected stack bytes dynamically since GetPreviousInstructionPc + // applies an architecture-dependent adjustment (e.g., -1 on x86_64, -4 on + // arm64). + unsigned char ExpectedStackBytes[2][6 * 8] = {}; + for (int s = 0; s < 2; s++) { + uintptr_t StackPCBegin = (s == 0) ? 100 : 200; + // Number of PCs = 5 + ExpectedStackBytes[s][0] = 0x5; + for (int i = 0; i < 5; i++) { + uint64_t pc = StackTrace::GetPreviousInstructionPc(StackPCBegin + i); + memcpy(&ExpectedStackBytes[s][(i + 1) * 8], &pc, sizeof(pc)); + } + } EXPECT_EQ(memcmp(Buffer + StackOffset + 16, ExpectedStackBytes[0], sizeof(ExpectedStackBytes[0])), 0); diff --git a/compiler-rt/lib/memprof/weak_symbols.txt b/compiler-rt/lib/memprof/weak_symbols.txt index bfece89e2e157..d43870f8965f2 100644 --- a/compiler-rt/lib/memprof/weak_symbols.txt +++ b/compiler-rt/lib/memprof/weak_symbols.txt @@ -1 +1,3 @@ -___memprof_default_options_str ___memprof_default_options __memprof_profile_filename +___memprof_default_options_str +___memprof_default_options +__memprof_profile_filename diff --git a/compiler-rt/test/memprof/CMakeLists.txt b/compiler-rt/test/memprof/CMakeLists.txt index 4c50ae6b83719..862565de24641 100644 --- a/compiler-rt/test/memprof/CMakeLists.txt +++ b/compiler-rt/test/memprof/CMakeLists.txt @@ -4,7 +4,7 @@ set(MEMPROF_TESTSUITES) set(MEMPROF_DYNAMIC_TESTSUITES) macro(get_bits_for_arch arch bits) - if (${arch} MATCHES "x86_64") + if (${arch} MATCHES "x86_64" OR ${arch} MATCHES "arm64" OR ${arch} MATCHES "aarch64") set(${bits} 64) else() message(FATAL_ERROR "Unexpected target architecture: ${arch}") diff --git a/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py b/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py new file mode 100644 index 0000000000000..af82d30cf4de9 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/lit.local.cfg.py @@ -0,0 +1,10 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + + +root = getRoot(config) + +if root.target_os not in ["Darwin"]: + config.unsupported = True diff --git a/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c new file mode 100644 index 0000000000000..bef41e4f87d73 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone.c @@ -0,0 +1,30 @@ +// Test basic memory profiling on Darwin via DYLD interposition of malloc/free. +// Verifies that memprof correctly intercepts allocations through the malloc +// zone mechanism. + +// RUN: %clang_memprof -O0 %s -o %t +// RUN: %env_memprof_opts=print_text=true:log_path=stderr:print_module_map=1 %run %t 2>%t.out +// RUN: FileCheck %s < %t.out +// Verify that raw addresses resolve to the expected symbol. +// RUN: %python %S/symbolize_raw_stacks.py %t %t.out | FileCheck --check-prefix=SYM %s + +// CHECK: Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40 +// CHECK-NEXT: access_count (ave/min/max): 20.00 / 20 / 20 +// CHECK: Stack for id [[STACKID]]: +// CHECK-NEXT: #0 0x{{[0-9a-f]+}} +// CHECK-NEXT: #1 0x{{[0-9a-f]+}} + +// SYM: main + +#include <stdlib.h> + +int main() { + int *p = (int *)malloc(10 * sizeof(int)); + for (int i = 0; i < 10; i++) + p[i] = i; + int j = 0; + for (int i = 0; i < 10; i++) + j += p[i]; + free(p); + return 0; +} diff --git a/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c new file mode 100644 index 0000000000000..b8f7b0ce210cd --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/malloc_zone_allocators.c @@ -0,0 +1,41 @@ +// Test that calloc, realloc, and posix_memalign are properly intercepted +// through the Darwin malloc zone mechanism. + +// RUN: %clang_memprof -O0 %s -o %t +// RUN: %env_memprof_opts=print_text=true:log_path=stderr:print_module_map=1 %run %t 2>%t.out +// RUN: FileCheck %s < %t.out +// Verify that raw addresses resolve to the expected symbol. +// RUN: %python %S/symbolize_raw_stacks.py %t %t.out | FileCheck --check-prefix=SYM %s + +// CHECK: Memory allocation stack id +// CHECK: alloc_count + +// SYM: main + +#include <stdlib.h> +#include <string.h> + +int main() { + // Test calloc interception. + int *p = (int *)calloc(10, sizeof(int)); + for (int i = 0; i < 10; i++) + p[i] = i; + free(p); + + // Test realloc interception. + char *q = (char *)malloc(10); + memset(q, 'a', 10); + q = (char *)realloc(q, 20); + memset(q, 'b', 20); + free(q); + + // Test posix_memalign interception. + void *r; + int ret = posix_memalign(&r, 64, 128); + if (ret == 0) { + memset(r, 0, 128); + free(r); + } + + return 0; +} diff --git a/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp b/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp new file mode 100644 index 0000000000000..db3db2a7f0366 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/new_delete.cpp @@ -0,0 +1,29 @@ +// Test that memprof works with operator new/delete on Darwin. +// This verifies C++ allocation interception through DYLD interposition. + +// RUN: %clangxx_memprof -O0 %s -o %t +// RUN: %env_memprof_opts=print_text=true:log_path=stderr:print_module_map=1 %run %t 2>%t.out +// RUN: FileCheck %s < %t.out +// Verify that raw addresses resolve to the expected symbol. +// RUN: %python %S/symbolize_raw_stacks.py %t %t.out | FileCheck --check-prefix=SYM %s + +// CHECK: Memory allocation stack id = [[STACKID:[0-9]+]]{{[[:space:]].*}}alloc_count 1, size (ave/min/max) 40.00 / 40 / 40 +// CHECK-NEXT: access_count (ave/min/max): 20.00 / 20 / 20 +// CHECK: Stack for id [[STACKID]]: +// CHECK-NEXT: #0 0x{{[0-9a-f]+}} +// CHECK-NEXT: #1 0x{{[0-9a-f]+}} + +// SYM: main + +#include <cstdlib> + +int main() { + int *p = new int[10]; + for (int i = 0; i < 10; i++) + p[i] = i; + int j = 0; + for (int i = 0; i < 10; i++) + j += p[i]; + delete[] p; + return 0; +} diff --git a/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp b/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp new file mode 100644 index 0000000000000..4ce8e3b8787de --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/raw_profile.cpp @@ -0,0 +1,19 @@ +// Test that the raw binary profile is correctly generated on Darwin. +// Verify the magic number header to ensure profile serialization works. + +// RUN: %clangxx_memprof %s -o %t +// RUN: %env_memprof_opts=log_path=stdout %run %t > %t.memprofraw +// RUN: od -c -N 8 %t.memprofraw | FileCheck %s + +#include <cstdlib> +#include <cstring> + +int main() { + char *x = (char *)malloc(10); + memset(x, 0, 10); + free(x); + return 0; +} + +// Check the raw profile magic number (little-endian). +// CHECK: 0000000 201 r f o r p m 377 diff --git a/compiler-rt/test/memprof/TestCases/Darwin/symbolize_raw_stacks.py b/compiler-rt/test/memprof/TestCases/Darwin/symbolize_raw_stacks.py new file mode 100644 index 0000000000000..276ac4bce4159 --- /dev/null +++ b/compiler-rt/test/memprof/TestCases/Darwin/symbolize_raw_stacks.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Symbolize raw memprof stack addresses using llvm-symbolizer. + +Usage: symbolize_raw_stacks.py <binary> <memprof_output_file> + +Parses the module map from memprof output (requires print_module_map=1) to +determine the binary's ASLR slide, converts raw runtime addresses to +file-relative addresses, and symbolizes them using llvm-symbolizer (expected +to be on PATH). Prints one symbolized function name per line for addresses +that fall within the main binary. +""" + +import re +import subprocess +import sys + + +def main(): + binary = sys.argv[1] + output_file = sys.argv[2] + + with open(output_file) as f: + data = f.read() + + # Parse module map to find the binary's load range. + # Format: 0xSTART-0xEND /path/to/binary (arch) <UUID> + binary_name = binary.rsplit("/", 1)[-1] + slide = None + end_addr = None + for line in data.split("\n"): + if binary_name in line and line.startswith("0x"): + addr_range = line.split()[0] + start_str, end_str = addr_range.split("-") + slide = int(start_str, 16) + end_addr = int(end_str, 16) + break + + if slide is None: + print("ERROR: Could not find binary in module map", file=sys.stderr) + sys.exit(1) + + # Collect unique addresses within the binary's memory range. + addrs = set() + for match in re.finditer(r"#\d+ (0x[0-9a-f]+)", data): + runtime_addr = int(match.group(1), 16) + if slide <= runtime_addr <= end_addr: + addrs.add(runtime_addr) + + # Symbolize using llvm-symbolizer with file-relative addresses. + for runtime_addr in sorted(addrs): + file_addr = runtime_addr - slide + result = subprocess.run( + ["llvm-symbolizer", "--obj=" + binary, hex(file_addr)], + capture_output=True, + text=True, + ) + func_name = result.stdout.strip().split("\n")[0] + if func_name and func_name != "??": + print(func_name) + + +if __name__ == "__main__": + main() diff --git a/compiler-rt/test/memprof/TestCases/free_sized.cpp b/compiler-rt/test/memprof/TestCases/free_sized.cpp index c8ce8464ef895..66c14735b051e 100644 --- a/compiler-rt/test/memprof/TestCases/free_sized.cpp +++ b/compiler-rt/test/memprof/TestCases/free_sized.cpp @@ -1,3 +1,4 @@ +// UNSUPPORTED: darwin // RUN: %clangxx_memprof %s -o %t // RUN: %env_memprof_opts=print_text=true:log_path=stdout %run %t | FileCheck %s diff --git a/compiler-rt/test/memprof/TestCases/stress_dtls.c b/compiler-rt/test/memprof/TestCases/stress_dtls.c index 8a0d671fb241a..f1a59eb868d12 100644 --- a/compiler-rt/test/memprof/TestCases/stress_dtls.c +++ b/compiler-rt/test/memprof/TestCases/stress_dtls.c @@ -1,4 +1,5 @@ // REQUIRES: memprof-64-bits +// UNSUPPORTED: darwin // Stress test dynamic TLS + dlopen + threads. // // RUN: %clang_memprof -x c -DSO_NAME=f0 %s -shared -o %t-f0.so -fPIC diff --git a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in index 1e2442a1487a4..66a7d71ba1684 100644 --- a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in +++ b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in @@ -11,7 +11,6 @@ lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.un # Setup config name. config.name = 'MemProfiler-Unit' config.target_arch = "@arch@" -assert config.target_arch == 'x86_64' config.test_exec_root = os.path.join("@COMPILER_RT_BINARY_DIR@", "lib", "memprof", "tests") @@ -21,9 +20,7 @@ config.test_source_root = config.test_exec_root # When LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=on, the initial value of # config.compiler_rt_libdir (COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR) has the # host triple as the trailing path component. The value is incorrect for i386 -# tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as -# target, and we don't support different environments for building and, -# respectively, running tests, we we only need to fix up the x86_64 case. +# tests on x86_64 hosts and vice versa. if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch: config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', config.compiler_rt_libdir) diff --git a/compiler-rt/test/memprof/lit.cfg.py b/compiler-rt/test/memprof/lit.cfg.py index e28507be4dc9e..4f4f0a39f279a 100644 --- a/compiler-rt/test/memprof/lit.cfg.py +++ b/compiler-rt/test/memprof/lit.cfg.py @@ -35,7 +35,9 @@ def get_required_attr(config, attr_name): # Setup source root. config.test_source_root = os.path.dirname(__file__) -libdl_flag = "-ldl" +libdl_flag = "" +if config.target_os == "Linux": + libdl_flag = "-ldl" # Setup default compiler flags used with -fmemory-profile option. # FIXME: Review the set of required flags and check if it can be reduced. @@ -74,10 +76,16 @@ def build_invocation(compile_flags): ("%clangxx_memprof ", build_invocation(clang_memprof_cxxflags)) ) if config.memprof_dynamic: - shared_libmemprof_path = os.path.join( - config.compiler_rt_libdir, - "libclang_rt.memprof{}.so".format(config.target_suffix), - ) + if config.target_os == "Darwin": + shared_libmemprof_path = os.path.join( + config.compiler_rt_libdir, + "libclang_rt.memprof_osx_dynamic.dylib", + ) + else: + shared_libmemprof_path = os.path.join( + config.compiler_rt_libdir, + "libclang_rt.memprof{}.so".format(config.target_suffix), + ) config.substitutions.append(("%shared_libmemprof", shared_libmemprof_path)) config.substitutions.append( ("%clang_memprof_static ", build_invocation(clang_memprof_static_cflags)) @@ -92,11 +100,17 @@ def build_invocation(compile_flags): config.available_features.add("fast-unwinder-works") -# Set LD_LIBRARY_PATH to pick dynamic runtime up properly. -new_ld_library_path = os.path.pathsep.join( - (config.compiler_rt_libdir, config.environment.get("LD_LIBRARY_PATH", "")) -) -config.environment["LD_LIBRARY_PATH"] = new_ld_library_path +# Set library path to pick dynamic runtime up properly. +if config.target_os == "Darwin": + new_dyld_library_path = os.path.pathsep.join( + (config.compiler_rt_libdir, config.environment.get("DYLD_LIBRARY_PATH", "")) + ) + config.environment["DYLD_LIBRARY_PATH"] = new_dyld_library_path +else: + new_ld_library_path = os.path.pathsep.join( + (config.compiler_rt_libdir, config.environment.get("LD_LIBRARY_PATH", "")) + ) + config.environment["LD_LIBRARY_PATH"] = new_ld_library_path # Default test suffixes. config.suffixes = [".c", ".cpp"] @@ -106,7 +120,7 @@ def build_invocation(compile_flags): config.substitutions.append(("%pie", "-pie")) # Only run the tests on supported OSs. -if config.target_os not in ["Linux"]: +if config.target_os not in ["Linux", "Darwin"]: config.unsupported = True if not config.parallelism_group: diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index 26badddae6f3a..5be4c0cc2802a 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -33,10 +33,10 @@ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) // The version number of the raw binary format. -#define MEMPROF_RAW_VERSION 5ULL +#define MEMPROF_RAW_VERSION 6ULL // Currently supported versions. -#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL} +#define MEMPROF_RAW_SUPPORTED_VERSIONS {3ULL, 4ULL, 5ULL, 6ULL} #define MEMPROF_V3_MIB_SIZE 132ULL; @@ -52,6 +52,8 @@ PACKED(struct Header { uint64_t SegmentOffset; uint64_t MIBOffset; uint64_t StackOffset; + // Added in V6: offset to memory block address section. + uint64_t MemAddressOffset; }); // A struct describing the information necessary to describe a /proc/maps diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h index 8fdae7a472d5f..de86af824c306 100644 --- a/llvm/include/llvm/ProfileData/MemProfReader.h +++ b/llvm/include/llvm/ProfileData/MemProfReader.h @@ -156,6 +156,12 @@ class LLVM_ABI RawMemProfReader final : public MemProfReader { report_fatal_error(std::move(E)); } + // Returns memory block addresses recorded during profiling (V6+). + // TODO: Used by llvm-profdata to map addresses back to symbols. + const llvm::SmallVector<uint64_t> &getMemBlockAddresses() const { + return MemBlockAddresses; + } + private: RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} @@ -202,6 +208,9 @@ class LLVM_ABI RawMemProfReader final : public MemProfReader { llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; CallStackMap StackMap; + // Memory block addresses recorded during profiling (V6+). + llvm::SmallVector<uint64_t> MemBlockAddresses; + // Cached symbolization from PC to Frame. llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 3fc0dbfd8e69d..9188b5698aea8 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -213,6 +213,26 @@ readMemInfoBlocksV5(const char *Ptr) { return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true); } +// V6 uses the same MIB format as V5. +llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> +readMemInfoBlocksV6(const char *Ptr) { + return readMemInfoBlocksCommon(Ptr, /*IsHistogramEncoded=*/true); +} + +llvm::SmallVector<uint64_t> readMemBlockAddresses(const char *Ptr) { + using namespace support; + + const uint64_t NumEntries = + endian::readNext<uint64_t, llvm::endianness::little>(Ptr); + llvm::SmallVector<uint64_t> Addresses; + Addresses.reserve(NumEntries); + for (uint64_t I = 0; I < NumEntries; I++) { + Addresses.push_back( + endian::readNext<uint64_t, llvm::endianness::little>(Ptr)); + } + return Addresses; +} + CallStackMap readStackInfo(const char *Ptr) { using namespace support; @@ -261,6 +281,7 @@ bool isRuntimePath(const StringRef Path) { // This list should be updated in case new files with additional interceptors // are added to the memprof runtime. return Filename == "memprof_malloc_linux.cpp" || + Filename == "memprof_malloc_mac.cpp" || Filename == "memprof_interceptors.cpp" || Filename == "memprof_new_delete.cpp"; } @@ -707,6 +728,8 @@ RawMemProfReader::readMemInfoBlocks(const char *Ptr) { return readMemInfoBlocksV4(Ptr); if (MemprofRawVersion == 5ULL) return readMemInfoBlocksV5(Ptr); + if (MemprofRawVersion == 6ULL) + return readMemInfoBlocksV6(Ptr); llvm_unreachable( "Panic: Unsupported version number when reading MemInfoBlocks"); } @@ -773,6 +796,14 @@ Error RawMemProfReader::readRawProfile( "memprof raw profile got different call stack for same id"); } + // Read in memory block addresses for V6+. + if (MemprofRawVersion >= 6ULL) { + for (const auto &Addr : + readMemBlockAddresses(Next + Header->MemAddressOffset)) { + MemBlockAddresses.push_back(Addr); + } + } + Next += Header->TotalSize; } diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp index 05616d81dbe5f..1da8a4b73f26a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp @@ -71,6 +71,8 @@ constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; +constexpr char MemProfFineGranularityFlagVar[] = "__memprof_fine_granularity"; + // Command-line flags. static cl::opt<bool> ClInsertVersionCheck( @@ -138,6 +140,12 @@ static cl::opt<bool> ClHistogram("memprof-histogram", cl::desc("Collect access count histograms"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClFineGranularity( + "memprof-fine-granularity", + cl::desc("Use fine shadow granularity (8 bytes) without collecting " + "histograms"), + cl::Hidden, cl::init(false)); + static cl::opt<std::string> MemprofRuntimeDefaultOptions("memprof-runtime-default-options", cl::desc("The default memprof options"), @@ -156,7 +164,8 @@ namespace { struct ShadowMapping { ShadowMapping() { Scale = ClMappingScale; - Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity; + Granularity = (ClHistogram || ClFineGranularity) ? HistogramGranularity + : ClMappingGranularity; Mask = ~(Granularity - 1); } @@ -240,8 +249,15 @@ MemProfilerPass::MemProfilerPass() = default; PreservedAnalyses MemProfilerPass::run(Function &F, AnalysisManager<Function> &AM) { - assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) && - "Memprof with histogram only supports default mapping granularity"); + if (ClHistogram && ClMappingGranularity != DefaultMemGranularity) + report_fatal_error( + "Memprof with histogram only supports default mapping granularity"); + if (ClFineGranularity && ClMappingGranularity != DefaultMemGranularity) + report_fatal_error("Memprof with fine granularity only supports default " + "mapping granularity"); + if (ClHistogram && ClFineGranularity) + report_fatal_error( + "Cannot use both -memprof-histogram and -memprof-fine-granularity"); Module &M = *F.getParent(); MemProfiler Profiler(M); if (Profiler.instrumentFunction(F)) @@ -451,14 +467,16 @@ void MemProfiler::instrumentAddress(Instruction *OrigIns, return; } - Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C); + Type *ShadowTy = (ClHistogram || ClFineGranularity) ? Type::getInt8Ty(*C) + : Type::getInt64Ty(*C); Type *ShadowPtrTy = PointerType::get(*C, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); - // If we are profiling with histograms, add overflow protection at 255. - if (ClHistogram) { + // If we are using fine granularity shadow (histogram or fine-granularity + // mode), add overflow protection at 255. + if (ClHistogram || ClFineGranularity) { Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255); Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount); Instruction *IncBlock = @@ -506,6 +524,23 @@ void createMemprofHistogramFlagVar(Module &M) { appendToCompilerUsed(M, MemprofHistogramFlag); } +// Set MemprofFineGranularityFlag as a Global variable in IR. This tells the +// runtime to use fine (8-byte) shadow granularity without collecting +// histograms. +void createMemprofFineGranularityFlagVar(Module &M) { + const StringRef VarName(MemProfFineGranularityFlagVar); + Type *IntTy1 = Type::getInt1Ty(M.getContext()); + auto MemprofFineGranularityFlag = new GlobalVariable( + M, IntTy1, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy1, APInt(1, ClFineGranularity)), VarName); + const Triple &TT = M.getTargetTriple(); + if (TT.supportsCOMDAT()) { + MemprofFineGranularityFlag->setLinkage(GlobalValue::ExternalLinkage); + MemprofFineGranularityFlag->setComdat(M.getOrInsertComdat(VarName)); + } + appendToCompilerUsed(M, MemprofFineGranularityFlag); +} + void createMemprofDefaultOptionsVar(Module &M) { Constant *OptionsConst = ConstantDataArray::getString( M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true); @@ -539,6 +574,8 @@ bool ModuleMemProfiler::instrumentModule(Module &M) { createMemprofHistogramFlagVar(M); + createMemprofFineGranularityFlagVar(M); + createMemprofDefaultOptionsVar(M); return true; @@ -549,7 +586,8 @@ void MemProfiler::initializeCallbacks(Module &M) { for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { const std::string TypeStr = AccessIsWrite ? "store" : "load"; - const std::string HistPrefix = ClHistogram ? "hist_" : ""; + const std::string HistPrefix = + (ClHistogram || ClFineGranularity) ? "hist_" : ""; SmallVector<Type *, 2> Args1{1, IntptrTy}; MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
