https://github.com/apaznikov updated https://github.com/llvm/llvm-project/pull/169897
>From e78c6ad7a069d9415c9993d30b3294ce4965ec0d Mon Sep 17 00:00:00 2001 From: Alexey Paznikov <[email protected]> Date: Mon, 15 Sep 2025 16:34:10 +0800 Subject: [PATCH 1/2] [TSan] Add dominance-based redundant instrumentation elimination This patch introduces a new optimization pass to ThreadSanitizer that eliminates redundant instrumentation of memory accesses using dominance and post-dominance analysis. The optimization relies on the observation that if two memory accesses to the same location are executed on a path without intermediate synchronization (e.g., atomic operations, fences, or unknown function calls), checking one of them is often sufficient to detect a data race. The algorithm works in two phases: 1. Dominance-based elimination: If access A dominates access B, they target the same memory location (MustAlias), and the path from A to B is free of synchronization, the instrumentation for B is redundant. 2. Post-dominance-based elimination: If access B post-dominates access A, they target the same location, and the path from A to B is safe, the instrumentation for A can be removed (since execution is guaranteed to reach B, where the race will be detected). To ensure soundness and prevent false negatives: - We use strict MustAlias results from AliasAnalysis. - We perform a safety analysis to ensure no "dangerous" instructions (synchronization, unknown calls) exist between the accesses. - For post-dominance, we conservatively avoid elimination across loops to prevent issues with non-terminating loops where the post-dominating access might never be reached. This implementation is intra-procedural and utilizes standard LLVM analyses (DominatorTree, PostDominatorTree, AAResults, LoopInfo). This optimization is disabled by default and can be enabled via `-tsan-use-dominance-analysis`. This implementation is based on the research algorithm designed by: Alexey Paznikov, Andrey Kogutenko, Yaroslav Osipov, Michael Schwarz, and Umang Mathur. --- compiler-rt/test/tsan/CMakeLists.txt | 115 ++- compiler-rt/test/tsan/lit.cfg.py | 10 + compiler-rt/test/tsan/lit.site.cfg.py.in | 3 + .../Instrumentation/ThreadSanitizer.cpp | 742 ++++++++++++++++-- .../ThreadSanitizer/dominance-elimination.ll | 540 +++++++++++++ 5 files changed, 1300 insertions(+), 110 deletions(-) create mode 100644 llvm/test/Instrumentation/ThreadSanitizer/dominance-elimination.ll diff --git a/compiler-rt/test/tsan/CMakeLists.txt b/compiler-rt/test/tsan/CMakeLists.txt index 4cd88507a2baf..7cf0d19efb039 100644 --- a/compiler-rt/test/tsan/CMakeLists.txt +++ b/compiler-rt/test/tsan/CMakeLists.txt @@ -18,6 +18,7 @@ endif() set(TSAN_DYNAMIC_TEST_DEPS ${TSAN_TEST_DEPS}) set(TSAN_TESTSUITES) set(TSAN_DYNAMIC_TESTSUITES) +set(TSAN_ENABLE_DOMINANCE_ANALYSIS "False") # Disable dominance analysis by default if (NOT DEFINED TSAN_TEST_DEFLAKE_THRESHOLD) set(TSAN_TEST_DEFLAKE_THRESHOLD "10") @@ -28,47 +29,79 @@ if(APPLE) darwin_filter_host_archs(TSAN_SUPPORTED_ARCH TSAN_TEST_ARCH) endif() -foreach(arch ${TSAN_TEST_ARCH}) - set(TSAN_TEST_APPLE_PLATFORM "osx") - set(TSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_osx_MIN_VER_FLAG}") - set(TSAN_TEST_APPLE_TARGET_IS_HOST ON) - pythonize_bool(TSAN_TEST_APPLE_TARGET_IS_HOST) +# Unified function for generating TSAN test suites by architectures. +# Arguments: +# OUT_LIST_VAR - name of output list (for example, TSAN_TESTSUITES or TSAN_DOM_TESTSUITES) +# SUFFIX_KIND - string added to config suffix after "-${arch}" (for example, "" or "-dominance") +# CONFIG_KIND - string added to config name after "Config" (for example, "" or "Dominance") +# ENABLE_DOM - "True"/"False" enable dominance analysis +function(tsan_generate_arch_suites OUT_LIST_VAR SUFFIX_KIND CONFIG_KIND ENABLE_DOM) + foreach(arch ${TSAN_TEST_ARCH}) + set(TSAN_ENABLE_DOMINANCE_ANALYSIS "${ENABLE_DOM}") - set(TSAN_TEST_TARGET_ARCH ${arch}) - string(TOLOWER "-${arch}" TSAN_TEST_CONFIG_SUFFIX) - get_test_cc_for_arch(${arch} TSAN_TEST_TARGET_CC TSAN_TEST_TARGET_CFLAGS) + set(TSAN_TEST_APPLE_PLATFORM "osx") + set(TSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_osx_MIN_VER_FLAG}") + set(TSAN_TEST_APPLE_TARGET_IS_HOST ON) + pythonize_bool(TSAN_TEST_APPLE_TARGET_IS_HOST) - string(REPLACE ";" " " LIBDISPATCH_CFLAGS_STRING " ${COMPILER_RT_TEST_LIBDISPATCH_CFLAGS}") - string(APPEND TSAN_TEST_TARGET_CFLAGS ${LIBDISPATCH_CFLAGS_STRING}) + set(TSAN_TEST_TARGET_ARCH ${arch}) + string(TOLOWER "-${arch}${SUFFIX_KIND}" TSAN_TEST_CONFIG_SUFFIX) + get_test_cc_for_arch(${arch} TSAN_TEST_TARGET_CC TSAN_TEST_TARGET_CFLAGS) - if (COMPILER_RT_HAS_MSSE4_2_FLAG) - string(APPEND TSAN_TEST_TARGET_CFLAGS " -msse4.2 ") - endif() + string(REPLACE ";" " " LIBDISPATCH_CFLAGS_STRING " ${COMPILER_RT_TEST_LIBDISPATCH_CFLAGS}") + string(APPEND TSAN_TEST_TARGET_CFLAGS ${LIBDISPATCH_CFLAGS_STRING}) - string(TOUPPER ${arch} ARCH_UPPER_CASE) - set(CONFIG_NAME ${ARCH_UPPER_CASE}Config) + if (COMPILER_RT_HAS_MSSE4_2_FLAG) + string(APPEND TSAN_TEST_TARGET_CFLAGS " -msse4.2 ") + endif() - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py - ) - list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + string(TOUPPER ${arch} ARCH_UPPER_CASE) + set(CONFIG_NAME ${ARCH_UPPER_CASE}Config${CONFIG_KIND}) - if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) - string(TOLOWER "-${arch}-${OS_NAME}-dynamic" TSAN_TEST_CONFIG_SUFFIX) - set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + ) + list(APPEND ${OUT_LIST_VAR} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + + if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) + # Dynamic runtime for corresponding variant + if("${SUFFIX_KIND}" STREQUAL "") + string(TOLOWER "-${arch}-${OS_NAME}-dynamic" TSAN_TEST_CONFIG_SUFFIX) + set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig${CONFIG_KIND}) + list(APPEND TSAN_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + else() + string(TOLOWER "-${arch}-${OS_NAME}-dynamic${SUFFIX_KIND}" TSAN_TEST_CONFIG_SUFFIX) + set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig${CONFIG_KIND}) + # Track dynamic dominance-analysis suites separately for a dedicated target. + list(APPEND TSAN_DOM_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + endif() + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) - list(APPEND TSAN_DYNAMIC_TESTSUITES - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + list(APPEND ${OUT_LIST_VAR} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + endif() + endforeach() + + # Propagate the assembled list to the parent scope + set(${OUT_LIST_VAR} "${${OUT_LIST_VAR}}" PARENT_SCOPE) + if(DEFINED TSAN_DOM_DYNAMIC_TESTSUITES) + set(TSAN_DOM_DYNAMIC_TESTSUITES "${TSAN_DOM_DYNAMIC_TESTSUITES}" PARENT_SCOPE) endif() -endforeach() +endfunction() + +# Default configuration +set(TSAN_TESTSUITES) +tsan_generate_arch_suites(TSAN_TESTSUITES "" "" "False") + +# Enable dominance analysis (check-tsan-dominance-analysis target) +set(TSAN_DOM_TESTSUITES) +tsan_generate_arch_suites(TSAN_DOM_TESTSUITES "-dominance" "Dominance" "True") # iOS and iOS simulator test suites # These are not added into "check-all", in order to run these tests, use @@ -128,6 +161,10 @@ list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit) if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) list(APPEND TSAN_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/dynamic) endif() +list(APPEND TSAN_DOM_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit) +if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) + list(APPEND TSAN_DOM_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/dynamic) +endif() add_lit_testsuite(check-tsan "Running ThreadSanitizer tests" ${TSAN_TESTSUITES} @@ -140,3 +177,17 @@ if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) EXCLUDE_FROM_CHECK_ALL DEPENDS ${TSAN_DYNAMIC_TEST_DEPS}) endif() + +add_lit_testsuite(check-tsan-dominance-analysis "Running ThreadSanitizer tests (dominance analysis)" + ${TSAN_DOM_TESTSUITES} + DEPENDS ${TSAN_TEST_DEPS}) +set_target_properties(check-tsan-dominance-analysis PROPERTIES FOLDER "Compiler-RT Tests") + +# New target: dynamic + dominance analysis +if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) + add_lit_testsuite(check-tsan-dominance-analysis-dynamic "Running ThreadSanitizer tests (dynamic, dominance analysis)" + ${TSAN_DOM_DYNAMIC_TESTSUITES} + EXCLUDE_FROM_CHECK_ALL + DEPENDS ${TSAN_DYNAMIC_TEST_DEPS}) + set_target_properties(check-tsan-dominance-analysis-dynamic PROPERTIES FOLDER "Compiler-RT Tests") +endif() diff --git a/compiler-rt/test/tsan/lit.cfg.py b/compiler-rt/test/tsan/lit.cfg.py index 8803a7bda9aa5..0e7dd5da36c23 100644 --- a/compiler-rt/test/tsan/lit.cfg.py +++ b/compiler-rt/test/tsan/lit.cfg.py @@ -56,6 +56,16 @@ def get_required_attr(config, attr_name): + extra_cflags + ["-I%s" % tsan_incdir] ) + +# Setup dominance-based elimination if enabled +tsan_enable_dominance = ( + getattr(config, "tsan_enable_dominance_analysis", "False") == "True" +) +if tsan_enable_dominance: + config.name += " (dominance-analysis)" + dom_flags = ["-mllvm", "-tsan-use-dominance-analysis"] + clang_tsan_cflags += dom_flags + clang_tsan_cxxflags = ( config.cxx_mode_flags + clang_tsan_cflags + ["-std=c++11"] + ["-I%s" % tsan_incdir] ) diff --git a/compiler-rt/test/tsan/lit.site.cfg.py.in b/compiler-rt/test/tsan/lit.site.cfg.py.in index 458da18a17147..a7e48854c22bb 100644 --- a/compiler-rt/test/tsan/lit.site.cfg.py.in +++ b/compiler-rt/test/tsan/lit.site.cfg.py.in @@ -10,6 +10,9 @@ config.target_cflags = "@TSAN_TEST_TARGET_CFLAGS@" config.target_arch = "@TSAN_TEST_TARGET_ARCH@" config.deflake_threshold = "@TSAN_TEST_DEFLAKE_THRESHOLD@" +# Enable dominance analysis. +config.tsan_enable_dominance_analysis = "@TSAN_ENABLE_DOMINANCE_ANALYSIS@" + # Load common config for all compiler-rt lit tests. lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 811911644106b..d787a0378639b 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -24,10 +24,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -84,6 +88,15 @@ static cl::opt<bool> ClOmitNonCaptured("tsan-omit-by-pointer-capturing", cl::init(true), cl::desc("Omit accesses due to pointer capturing"), cl::Hidden); +static cl::opt<bool> + ClUseDominanceAnalysis("tsan-use-dominance-analysis", cl::init(false), + cl::desc("Eliminate duplicating instructions which " + "(post)dominate given instruction"), + cl::Hidden); +static cl::opt<bool> ClPostDomAggressive( + "tsan-postdom-aggressive", cl::init(false), + cl::desc("Allow post-dominance elimination across loops (unsafe)"), + cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -96,11 +109,171 @@ STATISTIC(NumOmittedReadsFromConstantGlobals, "Number of reads from constant globals"); STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing"); +STATISTIC(NumOmittedByDominance, "Number of accesses ignored due to dominance"); +STATISTIC(NumOmittedByPostDominance, + "Number of accesses ignored due to post-dominance"); const char kTsanModuleCtorName[] = "tsan.module_ctor"; const char kTsanInitName[] = "__tsan_init"; namespace { +// Internal Instruction wrapper that contains more information about the +// Instruction from prior analysis. +struct InstructionInfo { + // Instrumentation emitted for this instruction is for a compounded set of + // read and write operations in the same basic block. + static constexpr unsigned kCompoundRW = (1U << 0); + + explicit InstructionInfo(Instruction *Inst) : Inst(Inst) {} + + bool isWriteOperation() const { + return isa<StoreInst>(Inst) || (Flags & kCompoundRW); + } + + Instruction *Inst; + unsigned Flags = 0; +}; + +/// A helper class to encapsulate the logic for eliminating redundant +/// instrumentation based on dominance analysis. +/// +/// This class takes a list of all accesses instructions that are candidates +/// for instrumentation. It prunes instructions that are (post-)dominated by +/// another access to the same memory location, provided that the path between +/// them is "clear" of any dangerous instructions (like function calls or +/// synchronization primitives). +class DominanceBasedElimination { +public: + /// \param AllInstr The vector of instructions to analyze. This vector is + /// modified in-place. + /// \param DT The Dominator Tree for the current function. + /// \param PDT The Post-Dominator Tree for the current function. + /// \param AA The results of Alias Analysis. + DominanceBasedElimination(SmallVectorImpl<InstructionInfo> &AllInstr, + DominatorTree &DT, PostDominatorTree &PDT, + AAResults &AA, LoopInfo &LI) + : AllInstr(AllInstr), DT(DT), PDT(PDT), AA(AA), LI(LI) { + // Build per-function basic-block safety cache once + if (!AllInstr.empty() && AllInstr.front().Inst) { + Function *F = AllInstr.front().Inst->getFunction(); + BSC.ReachableToEnd.reserve(F->size()); + BSC.ConeSafeCache.reserve(F->size()); + buildBlockSafetyCache(*F); + } + } + + /// Runs the analysis and prunes redundant instructions. + /// It sequentially applies elimination based on dominance and post-dominance. + void run() { + eliminate</*IsPostDom=*/false>(); // Dominance-based elimination + eliminate</*IsPostDom=*/true>(); // Post-dominance-based elimination + } + +private: + /// Per-function precomputation cache: instruction indices within BB and + /// positions of "dangerous" instructions. + struct BlockSafetyCache { + DenseMap<const Instruction *, unsigned> IndexInBB; + + DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> DangerIdxInBB; + DenseMap<const BasicBlock *, bool> HasDangerInBB; + + DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> DangerIdxInBBPostDom; + DenseMap<const BasicBlock *, bool> HasDangerInBBPostDom; + + // Reachability cache: a set of blocks that can reach EndBB. + DenseMap<const BasicBlock *, SmallPtrSet<const BasicBlock *, 32>> + ReachableToEnd; + // Cone safety cache: StartBB -> (EndBB -> pathIsSafe): to avoid custom hash + DenseMap<const BasicBlock *, + DenseMap<const BasicBlock *, std::pair<bool, bool>>> + ConeSafeCache; + } BSC; + + // Reusable worklists/visited sets to amortize allocations. + SmallVector<const BasicBlock *, 32> Worklist; + SmallPtrSet<const BasicBlock *, 32> CanReachSet; + + void buildBlockSafetyCache(Function &F); + + /// Check that suffix (after FromIdx) in BB contains no unsafe instruction. + bool suffixSafe(const BasicBlock *BB, unsigned FromIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> + &DangerIdxInBB) const; + + /// Check that prefix (before ToIdx) in BB contains no unsafe instruction. + bool prefixSafe(const BasicBlock *BB, unsigned ToIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> + &DangerIdxInBB) const; + + /// Check that (FromIdx, ToExclusiveIdx) interval inside a single BB is safe. + bool intervalSafeSameBB( + const BasicBlock *BB, unsigned FromIdx, unsigned ToExclusiveIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> + &DangerIdxInBB) const; + + /// Checks if an instruction is "dangerous" from TSan's perspective. + /// Dangerous instructions include function calls, atomics, and fences. + /// + /// \param Inst The instruction to check. + /// \return true if the instruction is dangerous. + static bool isInstrSafe(const Instruction *Inst); + + /// For post-dominance, need to check whether the path contains loops, + /// irregular exits or unsafe calls. + static bool isInstrSafeForPostDom(const Instruction *I); + + /// Find BBs which can reach EndBB + SmallPtrSet<const BasicBlock *, 32> buildCanReachEnd(const BasicBlock *EndBB); + + /// Forward traversal from StartBB, restricted to the cone that reach EndBB. + /// In post-dom mode additionally rejects paths that go through any loop BB. + std::pair<bool, bool> traverseReachableAndCheckSafety( + const BasicBlock *StartBB, const BasicBlock *EndBB, + const SmallPtrSetImpl<const BasicBlock *> &CanReachEnd); + + /// Checks if the path between two instructions is "clear", i.e., it does not + /// contain any dangerous instructions that could alter the thread + /// synchronization state. + /// \param StartInst The starting instruction (dominates for Dom, is dominated + /// for PostDom). + /// \param EndInst The ending instruction (is dominated for Dom, + /// post-dominates for PostDom). + /// \param DTBase DominatorTree (for Dom) or PostDominatorTree (for PostDom). + /// \return true if the path is clear. + template <bool IsPostDom> + bool isPathClear(Instruction *StartInst, Instruction *EndInst, + const DominatorTreeBase<BasicBlock, IsPostDom> *DTBase); + + /// A helper function to create a map from Instruction* to its index + /// in the AllInstr vector for fast lookups. + DenseMap<Instruction *, size_t> createInstrToIndexMap() const; + + /// Attempts to find a dominating instruction that can eliminate the need to + /// instrument instruction i + /// \param DTBase The dominator (post-dominator) tree being used + /// \param InstrToIndexMap Maps instructions to their indices in the AllInstr + /// \param ToRemove Vector tracking which instructions can be eliminated + /// \returns true if a dominating instruction was found that eliminates i + template <bool IsPostDom> + bool findAndMarkDominatingInstr( + size_t i, const DominatorTreeBase<BasicBlock, IsPostDom> *DTBase, + const DenseMap<Instruction *, size_t> &InstrToIndexMap, + SmallVectorImpl<bool> &ToRemove); + + /// The core elimination logic. Templated to work with both Dominators + /// and Post-Dominators. + template <bool IsPostDom> void eliminate(); + + /// A reference to the vector of instructions that we modify. + SmallVectorImpl<InstructionInfo> &AllInstr; + + /// References to the required analysis results. + DominatorTree &DT; + PostDominatorTree &PDT; + AAResults &AA; + LoopInfo &LI; +}; /// ThreadSanitizer: instrument the code in module to find races. /// @@ -109,7 +282,9 @@ namespace { /// ensures the __tsan_init function is in the list of global constructors for /// the module. struct ThreadSanitizer { - ThreadSanitizer() { + ThreadSanitizer(const TargetLibraryInfo &TLI, DominatorTree *DT, + PostDominatorTree *PDT, AAResults *AA, LoopInfo *LI) + : TLI(TLI), DT(DT), PDT(PDT), AA(AA), LI(LI) { // Check options and warn user. if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) { errs() @@ -118,22 +293,9 @@ struct ThreadSanitizer { } } - bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); + bool sanitizeFunction(Function &F); private: - // Internal Instruction wrapper that contains more information about the - // Instruction from prior analysis. - struct InstructionInfo { - // Instrumentation emitted for this instruction is for a compounded set of - // read and write operations in the same basic block. - static constexpr unsigned kCompoundRW = (1U << 0); - - explicit InstructionInfo(Instruction *Inst) : Inst(Inst) {} - - Instruction *Inst; - unsigned Flags = 0; - }; - void initialize(Module &M, const TargetLibraryInfo &TLI); bool instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); @@ -145,6 +307,12 @@ struct ThreadSanitizer { int getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, const DataLayout &DL); void InsertRuntimeIgnores(Function &F); + const TargetLibraryInfo &TLI; + DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; + AAResults *AA = nullptr; + LoopInfo *LI = nullptr; + Type *IntptrTy; FunctionCallee TsanFuncEntry; FunctionCallee TsanFuncExit; @@ -174,6 +342,416 @@ struct ThreadSanitizer { FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; }; +//----------------------------------------------------------------------------- +// DominanceBasedElimination Implementation +//----------------------------------------------------------------------------- + +void DominanceBasedElimination::buildBlockSafetyCache(Function &F) { + // Reserve to reduce rehashing for a typical case. + BSC.DangerIdxInBB.reserve(F.size()); + BSC.HasDangerInBB.reserve(F.size()); + BSC.DangerIdxInBBPostDom.reserve(F.size()); + BSC.HasDangerInBBPostDom.reserve(F.size()); + + for (BasicBlock &BB : F) { + SmallVector<unsigned, 4> Danger; + SmallVector<unsigned, 4> DangerForPostDom; + unsigned Idx = 0; + for (Instruction &I : BB) { + if (!isInstrSafe(&I)) + Danger.push_back(Idx); + if (!isInstrSafeForPostDom(&I)) + DangerForPostDom.push_back(Idx); + BSC.IndexInBB[&I] = Idx++; + } + BSC.HasDangerInBB[&BB] = !Danger.empty(); + // Already in order by linear scan. + BSC.DangerIdxInBB[&BB] = std::move(Danger); + + BSC.HasDangerInBBPostDom[&BB] = !DangerForPostDom.empty(); + + // Additional check for postdom: if the path contains loops + if (LI.getLoopFor(&BB) != nullptr) { + BSC.HasDangerInBBPostDom[&BB] = true; + DangerForPostDom.push_back(BB.size() - 1); + } + BSC.DangerIdxInBBPostDom[&BB] = std::move(DangerForPostDom); + } +} + +// Check that suffix (after index FromIdx) in the BB contains no dangerous +// instruction. +bool DominanceBasedElimination::suffixSafe( + const BasicBlock *BB, unsigned FromIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> &DangerIdxInBB) + const { + const auto It = DangerIdxInBB.find(BB); + if (It == DangerIdxInBB.end() || It->second.empty()) + return true; + const auto &DangerIdx = It->second; + // First dangerous index >= FromIdx? + const auto LB = std::lower_bound(DangerIdx.begin(), DangerIdx.end(), FromIdx); + return LB == DangerIdx.end(); +} + +// Check that prefix (before index ToIdx) of the BB contains no dangerous +// instruction. +bool DominanceBasedElimination::prefixSafe( + const BasicBlock *BB, unsigned ToIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> &DangerIdxInBB) + const { + const auto It = DangerIdxInBB.find(BB); + if (It == DangerIdxInBB.end() || It->second.empty()) + return true; + const auto &DangerIdx = It->second; + // Any dangerous index < ToIdx? + const auto LB = std::lower_bound(DangerIdx.begin(), DangerIdx.end(), ToIdx); + return LB == DangerIdx.begin(); +} + +bool DominanceBasedElimination::intervalSafeSameBB( + const BasicBlock *BB, unsigned FromIdx, unsigned ToExclusiveIdx, + const DenseMap<const BasicBlock *, SmallVector<unsigned, 4>> &DangerIdxInBB) + const { + const auto It = DangerIdxInBB.find(BB); + if (It == DangerIdxInBB.end() || It->second.empty()) + return true; + const auto &DangerIdx = It->second; + const auto LB = std::lower_bound(DangerIdx.begin(), DangerIdx.end(), FromIdx); + if (LB == DangerIdx.end()) + return true; + return *LB >= ToExclusiveIdx; +} + +bool isTsanAtomic(const Instruction *I) { + // TODO: Ask TTI whether synchronization scope is between threads. + auto SSID = getAtomicSyncScopeID(I); + if (!SSID) + return false; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + return *SSID != SyncScope::SingleThread; + return true; +} + +bool DominanceBasedElimination::isInstrSafe(const Instruction *Inst) { + // Atomic operations with inter-thread communication are the primary + // source of synchronization and are never safe. + if (isTsanAtomic(Inst)) + return false; + + // Check function calls, if it's known to be sync-free + if (const auto *CB = dyn_cast<CallBase>(Inst)) { + if (const Function *Callee = CB->getCalledFunction()) + return Callee->hasNoSync(); + return false; + } + // All other instructions are considered safe because they do not, + // by themselves, create happens-before relationships + return true; +} + +bool DominanceBasedElimination::isInstrSafeForPostDom(const Instruction *I) { + // Irregular exits (e.g. return, abort, exceptions) and function calls + // (potential infinite loops) make post-dominance elimination unsafe. + if (isa<ReturnInst>(I) || isa<ResumeInst>(I)) + return false; + + if (const auto *CB = dyn_cast<CallBase>(I)) { + // Intrinsics are generally safe (no loops/exits hidden inside). + if (isa<IntrinsicInst>(CB)) + return true; + + if (const Function *Callee = CB->getCalledFunction()) { + if (Callee->hasFnAttribute(Attribute::WillReturn) && + Callee->hasFnAttribute(Attribute::NoUnwind)) + return true; + } + return false; + } + return true; +} + +SmallPtrSet<const BasicBlock *, 32> +DominanceBasedElimination::buildCanReachEnd(const BasicBlock *EndBB) { + // Check the cache first. + if (const auto CachedIt = BSC.ReachableToEnd.find(EndBB); + CachedIt != BSC.ReachableToEnd.end()) + return CachedIt->second; + + // Reuse VisitedSet as the reachability set. + Worklist.clear(); + CanReachSet.clear(); + + CanReachSet.insert(EndBB); + Worklist.push_back(EndBB); + while (!Worklist.empty()) { + const BasicBlock *BB = Worklist.back(); + Worklist.pop_back(); + for (const BasicBlock *Pred : predecessors(BB)) { + if (CanReachSet.insert(Pred).second) + Worklist.push_back(Pred); + } + } + + // Store in the cache and return a copy. + BSC.ReachableToEnd[EndBB] = CanReachSet; + return BSC.ReachableToEnd[EndBB]; +} + +std::pair<bool, bool> +DominanceBasedElimination::traverseReachableAndCheckSafety( + const BasicBlock *StartBB, const BasicBlock *EndBB, + const SmallPtrSetImpl<const BasicBlock *> &CanReachEnd) { + Worklist.clear(); + CanReachSet.clear(); + + auto enqueueNonVisited = [&](const BasicBlock *BB) { + if ((BB != EndBB) && CanReachSet.insert(BB).second) + Worklist.push_back(BB); + }; + + for (const BasicBlock *Succ : successors(StartBB)) { + if (CanReachEnd.count(Succ)) + enqueueNonVisited(Succ); + } + + bool DomSafety = true, PostDomSafety = true; + + while (!Worklist.empty()) { + const BasicBlock *BB = Worklist.pop_back_val(); + + // Post-dom safety: any intermediate BB that is part of a loop + // makes elimination unsafe (potential infinite loop). + if (!ClPostDomAggressive && PostDomSafety && + BSC.HasDangerInBBPostDom.lookup(BB)) + PostDomSafety = false; + + // Any dangerous instruction in an intermediate BB makes the path “dirty”. + if (DomSafety && BSC.HasDangerInBB.lookup(BB)) + DomSafety = false; + + if (!DomSafety && !PostDomSafety) + break; + + for (const BasicBlock *Succ : successors(BB)) + if (CanReachEnd.contains(Succ)) + enqueueNonVisited(Succ); + } + return {DomSafety, PostDomSafety}; +} + +template <bool IsPostDom> +bool DominanceBasedElimination::isPathClear( + Instruction *StartInst, Instruction *EndInst, + const DominatorTreeBase<BasicBlock, IsPostDom> *DTBase) { + LLVM_DEBUG(dbgs() << "Checking path from " << *StartInst << " to " << *EndInst + << "\t(" << (IsPostDom ? "PostDom" : "Dom") << ")\n"); + const BasicBlock *StartBB = StartInst->getParent(); + const BasicBlock *EndBB = EndInst->getParent(); + + // Intra-block indices (used in either case). + const unsigned StartIdx = BSC.IndexInBB.lookup(StartInst); + const unsigned EndIdx = BSC.IndexInBB.lookup(EndInst); + + // Intra-BB: verify (StartInst; EndInst) is safe. + if (StartBB == EndBB) { + bool DomSafety = + intervalSafeSameBB(StartBB, StartIdx + 1, EndIdx, BSC.DangerIdxInBB); + if constexpr (IsPostDom) { + return DomSafety && intervalSafeSameBB(StartBB, StartIdx + 1, EndIdx, + BSC.DangerIdxInBBPostDom); + } + return DomSafety; + } + + // Quick local checks on edges. + bool DomSafety = suffixSafe(StartBB, StartIdx + 1, BSC.DangerIdxInBB) && + prefixSafe(EndBB, EndIdx, BSC.DangerIdxInBB); + if (!DomSafety) + return false; + if constexpr (IsPostDom) { + bool PostDomSafety = + suffixSafe(StartBB, StartIdx + 1, BSC.DangerIdxInBBPostDom) && + prefixSafe(EndBB, EndIdx, BSC.DangerIdxInBBPostDom); + if (!PostDomSafety) + return false; + } + + // Cone safety cache lookup. + if (const auto OuterIt = BSC.ConeSafeCache.find(StartBB); + OuterIt != BSC.ConeSafeCache.end()) { + if (const auto InnerIt = OuterIt->second.find(EndBB); + InnerIt != OuterIt->second.end()) { + const auto &[DomSafe, PostDomSafe] = InnerIt->second; + if (IsPostDom) + return DomSafe && PostDomSafe; + return DomSafe; + } + } + + // Build the set of blocks that can reach EndBB (reverse traversal). + const auto CanReachEnd = buildCanReachEnd(EndBB); + + // Forward traversal from StartBB, restricted to the cone that reach EndBB. + const auto [DomSafe, PostDomSafe] = + traverseReachableAndCheckSafety(StartBB, EndBB, CanReachEnd); + BSC.ConeSafeCache[StartBB][EndBB] = {DomSafe, PostDomSafe}; + LLVM_DEBUG(dbgs() << "isPathClear (DomSafe): " << (DomSafe ? "true" : "false") + << "\nisPathClear (PostDomSafe): " + << (PostDomSafe ? "true" : "false") << "\n"); + if constexpr (IsPostDom) + return DomSafe && PostDomSafe; + return DomSafe; +} + +DenseMap<Instruction *, size_t> +DominanceBasedElimination::createInstrToIndexMap() const { + DenseMap<Instruction *, size_t> InstrToIndexMap; + InstrToIndexMap.reserve(AllInstr.size()); + for (size_t i = 0; i < AllInstr.size(); ++i) + InstrToIndexMap[AllInstr[i].Inst] = i; + return InstrToIndexMap; +} + +template <bool IsPostDom> +bool DominanceBasedElimination::findAndMarkDominatingInstr( + size_t i, const DominatorTreeBase<BasicBlock, IsPostDom> *DTBase, + const DenseMap<Instruction *, size_t> &InstrToIndexMap, + SmallVectorImpl<bool> &ToRemove) { + LLVM_DEBUG(dbgs() << "\nAnalyzing: " << *(AllInstr[i].Inst) << "\n"); + const InstructionInfo &CurrII = AllInstr[i]; + Instruction *CurrInst = CurrII.Inst; + const BasicBlock *CurrBB = CurrInst->getParent(); + + const DomTreeNode *CurrDTNode = DTBase->getNode(CurrBB); + if (!CurrDTNode) + return false; + + // Traverse up the dominator tree + for (const auto *IDomNode = CurrDTNode; IDomNode; + IDomNode = IDomNode->getIDom()) { + const BasicBlock *DomBB = IDomNode->getBlock(); + if (!DomBB) + break; + + // Look for a suitable dominating instrumented instruction in DomBB + auto StartIt = DomBB->begin(); + auto EndIt = DomBB->end(); + if (CurrBB == DomBB) { // We are at the same BB + if constexpr (IsPostDom) + StartIt = std::next(CurrInst->getIterator()); + else + EndIt = CurrInst->getIterator(); + } + + for (auto InstIt = StartIt; InstIt != EndIt; ++InstIt) { + const Instruction &PotentialDomInst = *InstIt; + LLVM_DEBUG(dbgs() << "PotentialDomInst: " << PotentialDomInst << "\n"); + + // Check if PotentialDomInst is dominating and instrumented + const auto It = InstrToIndexMap.find(&PotentialDomInst); + if (It == InstrToIndexMap.end() || ToRemove[It->second]) + continue; // Not found in AllInstr or already marked for removal + + const size_t DomIndex = It->second; + InstructionInfo &DomII = AllInstr[DomIndex]; + Instruction *DomInst = DomII.Inst; + + auto IsVolatile = [](const Instruction *I) { + if (const auto *L = dyn_cast<LoadInst>(I)) + return L->isVolatile(); + if (const auto *S = dyn_cast<StoreInst>(I)) + return S->isVolatile(); + return false; + }; + if (ClDistinguishVolatile && IsVolatile(DomInst)) + continue; + + if (AA.isMustAlias(MemoryLocation::get(CurrInst), + MemoryLocation::get(DomInst))) { + const bool CurrIsWrite = CurrII.isWriteOperation(); + const bool DomIsWrite = DomII.isWriteOperation(); + + // Check compatibility logic (DomInst covers CurrInst): + // 1. If DomInst is a 'write', it covers both read and write. + // 2. If DomInst is a 'read', it only covers a read. + if (DomIsWrite || !CurrIsWrite) { + // Check the path to/from CurrInst from/to DomInst + Instruction *PathStart = IsPostDom ? CurrInst : DomInst; + Instruction *PathEnd = IsPostDom ? DomInst : CurrInst; + + if (isPathClear<IsPostDom>(PathStart, PathEnd, DTBase)) { + LLVM_DEBUG(dbgs() + << "TSAN: Omitting instrumentation for: " << *CurrInst + << " ((post-)dominated and covered by: " << *DomInst + << ")\n"); + ToRemove[i] = true; + // Found a (post)dominator, move to the next Inst + return true; + } + } + } + } + } + return false; +} + +/// Eliminates redundant instrumentation based on (pre/post)dominance analysis. +/// \tparam IsPostDom If true, uses post-dominance; if false, uses dominance. +template <bool IsPostDom> void DominanceBasedElimination::eliminate() { + LLVM_DEBUG(dbgs() << "Starting " << (IsPostDom ? "post-" : "") + << "dominance-based analysis\n"); + if (AllInstr.empty()) + return; + + DominatorTreeBase<BasicBlock, IsPostDom> *DTBase; + if constexpr (IsPostDom) + DTBase = &PDT; + else + DTBase = &DT; + + SmallVector<bool, 16> ToRemove(AllInstr.size(), false); + unsigned RemovedCount = 0; + + // Create a map from Instruction* to its index in the AllInstr vector. + DenseMap<Instruction *, size_t> InstrToIndexMap = createInstrToIndexMap(); + + for (size_t i = 0; i < AllInstr.size(); ++i) { + if (ToRemove[i]) + continue; + + if (findAndMarkDominatingInstr<IsPostDom>(i, DTBase, InstrToIndexMap, + ToRemove)) + RemovedCount++; + } + + LLVM_DEBUG(dbgs() << "\nFinal list of instructions and their status\n"; + for (size_t i = 0; i < AllInstr.size(); ++i) dbgs() + << "[" << (ToRemove[i] ? "REMOVED" : "KEPT") << "]\t" + << *AllInstr[i].Inst << "\n"); + + if (RemovedCount > 0) { + LLVM_DEBUG(dbgs() << "\n=== Updating final instruction list ===\n" + << "Original size: " << AllInstr.size() << "\n" + << "Instructions to remove: " << RemovedCount << "\n" + << "Remaining instructions: " + << (AllInstr.size() - RemovedCount) << "\n"); + auto ToRemoveIter = ToRemove.begin(); + erase_if(AllInstr, + [&](const InstructionInfo &) { return *ToRemoveIter++; }); + + if constexpr (IsPostDom) + NumOmittedByPostDominance += RemovedCount; + else + NumOmittedByDominance += RemovedCount; + } + LLVM_DEBUG(dbgs() << "Dominance analysis complete\n"); +} + +//----------------------------------------------------------------------------- +// ThreadSanitizer Implementation +//----------------------------------------------------------------------------- + void insertModuleCtor(Module &M) { getOrCreateSanitizerCtorAndInitFunctions( M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, @@ -182,12 +760,25 @@ void insertModuleCtor(Module &M) { // time. Hook them into the global ctors list in that case: [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); } -} // namespace +} // namespace PreservedAnalyses ThreadSanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { - ThreadSanitizer TSan; - if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F))) + DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; + AAResults *AA = nullptr; + LoopInfo *LI = nullptr; + + if (ClUseDominanceAnalysis) { + DT = &FAM.getResult<DominatorTreeAnalysis>(F); + PDT = &FAM.getResult<PostDominatorTreeAnalysis>(F); + AA = &FAM.getResult<AAManager>(F); + LI = &FAM.getResult<LoopAnalysis>(F); + } + + ThreadSanitizer TSan(FAM.getResult<TargetLibraryAnalysis>(F), DT, PDT, AA, + LI); + if (TSan.sanitizeFunction(F)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } @@ -224,12 +815,12 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) { std::string ByteSizeStr = utostr(ByteSize); std::string BitSizeStr = utostr(BitSize); SmallString<32> ReadName("__tsan_read" + ByteSizeStr); - TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(), - IRB.getPtrTy()); + TsanRead[i] = + M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(), IRB.getPtrTy()); SmallString<32> WriteName("__tsan_write" + ByteSizeStr); - TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(), - IRB.getPtrTy()); + TsanWrite[i] = + M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(), IRB.getPtrTy()); SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); TsanUnalignedRead[i] = M.getOrInsertFunction( @@ -258,8 +849,8 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) { UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getPtrTy()); SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr); - TsanCompoundRW[i] = M.getOrInsertFunction( - CompoundRWName, Attr, IRB.getVoidTy(), IRB.getPtrTy()); + TsanCompoundRW[i] = M.getOrInsertFunction(CompoundRWName, Attr, + IRB.getVoidTy(), IRB.getPtrTy()); SmallString<64> UnalignedCompoundRWName("__tsan_unaligned_read_write" + ByteSizeStr); @@ -277,7 +868,7 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) { // Args of type Ty need extension only when BitSize is 32 or less. using Idxs = std::vector<unsigned>; - Idxs Idxs2Or12 ((BitSize <= 32) ? Idxs({1, 2}) : Idxs({2})); + Idxs Idxs2Or12((BitSize <= 32) ? Idxs({1, 2}) : Idxs({2})); Idxs Idxs34Or1234((BitSize <= 32) ? Idxs({1, 2, 3, 4}) : Idxs({3, 4})); SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); TsanAtomicStore[i] = M.getOrInsertFunction( @@ -336,12 +927,10 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) { TLI.getAttrList(&Ctx, {0}, /*Signed=*/true, /*Ret=*/false, Attr), IRB.getVoidTy(), OrdTy); - MemmoveFn = - M.getOrInsertFunction("__tsan_memmove", Attr, IRB.getPtrTy(), - IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); - MemcpyFn = - M.getOrInsertFunction("__tsan_memcpy", Attr, IRB.getPtrTy(), - IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); + MemmoveFn = M.getOrInsertFunction("__tsan_memmove", Attr, IRB.getPtrTy(), + IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); + MemcpyFn = M.getOrInsertFunction("__tsan_memcpy", Attr, IRB.getPtrTy(), + IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); MemsetFn = M.getOrInsertFunction( "__tsan_memset", TLI.getAttrList(&Ctx, {1}, /*Signed=*/true, /*Ret=*/false, Attr), @@ -474,16 +1063,6 @@ void ThreadSanitizer::chooseInstructionsToInstrument( Local.clear(); } -static bool isTsanAtomic(const Instruction *I) { - // TODO: Ask TTI whether synchronization scope is between threads. - auto SSID = getAtomicSyncScopeID(I); - if (!SSID) - return false; - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - return *SSID != SyncScope::SingleThread; - return true; -} - void ThreadSanitizer::InsertRuntimeIgnores(Function &F) { InstrumentationIRBuilder IRB(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt()); @@ -495,8 +1074,7 @@ void ThreadSanitizer::InsertRuntimeIgnores(Function &F) { } } -bool ThreadSanitizer::sanitizeFunction(Function &F, - const TargetLibraryInfo &TLI) { +bool ThreadSanitizer::sanitizeFunction(Function &F) { // This is required to prevent instrumenting call to __tsan_init from within // the module constructor. if (F.getName() == kTsanModuleCtorName) @@ -514,9 +1092,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, initialize(*F.getParent(), TLI); SmallVector<InstructionInfo, 8> AllLoadsAndStores; - SmallVector<Instruction*, 8> LocalLoadsAndStores; - SmallVector<Instruction*, 8> AtomicAccesses; - SmallVector<Instruction*, 8> MemIntrinCalls; + SmallVector<Instruction *, 8> LocalLoadsAndStores; + SmallVector<Instruction *, 8> AtomicAccesses; + SmallVector<Instruction *, 8> MemIntrinCalls; bool Res = false; bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); @@ -545,6 +1123,11 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); } + if (ClUseDominanceAnalysis && DT && PDT && AA && LI) { + DominanceBasedElimination DBE(AllLoadsAndStores, *DT, *PDT, *AA, *LI); + DBE.run(); + } + // We have collected all loads and stores. // FIXME: many of these accesses do not need to be checked for races // (e.g. variables that do not escape, etc). @@ -668,16 +1251,27 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II, static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { uint32_t v = 0; switch (ord) { - case AtomicOrdering::NotAtomic: - llvm_unreachable("unexpected atomic ordering!"); - case AtomicOrdering::Unordered: [[fallthrough]]; - case AtomicOrdering::Monotonic: v = 0; break; - // Not specified yet: - // case AtomicOrdering::Consume: v = 1; break; - case AtomicOrdering::Acquire: v = 2; break; - case AtomicOrdering::Release: v = 3; break; - case AtomicOrdering::AcquireRelease: v = 4; break; - case AtomicOrdering::SequentiallyConsistent: v = 5; break; + case AtomicOrdering::NotAtomic: + llvm_unreachable("unexpected atomic ordering!"); + case AtomicOrdering::Unordered: + [[fallthrough]]; + case AtomicOrdering::Monotonic: + v = 0; + break; + // Not specified yet: + // case AtomicOrdering::Consume: v = 1; break; + case AtomicOrdering::Acquire: + v = 2; + break; + case AtomicOrdering::Release: + v = 3; + break; + case AtomicOrdering::AcquireRelease: + v = 4; + break; + case AtomicOrdering::SequentiallyConsistent: + v = 5; + break; } return IRB->getInt32(v); } @@ -693,20 +1287,15 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { InstrumentationIRBuilder IRB(I); if (MemSetInst *M = dyn_cast<MemSetInst>(I)) { - Value *Cast1 = IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false); + Value *Cast1 = + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false); Value *Cast2 = IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false); - IRB.CreateCall( - MemsetFn, - {M->getArgOperand(0), - Cast1, - Cast2}); + IRB.CreateCall(MemsetFn, {M->getArgOperand(0), Cast1, Cast2}); I->eraseFromParent(); } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) { - IRB.CreateCall( - isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, - {M->getArgOperand(0), - M->getArgOperand(1), - IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + IRB.CreateCall(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, + {M->getArgOperand(0), M->getArgOperand(1), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); I->eraseFromParent(); } return false; @@ -728,8 +1317,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(OrigTy, Addr, DL); if (Idx < 0) return false; - Value *Args[] = {Addr, - createOrdering(&IRB, LI->getOrdering())}; + Value *Args[] = {Addr, createOrdering(&IRB, LI->getOrdering())}; Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args); Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy); I->replaceAllUsesWith(Cast); @@ -776,12 +1364,10 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Value *CmpOperand = - IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty); + IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty); Value *NewOperand = - IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty); - Value *Args[] = {Addr, - CmpOperand, - NewOperand, + IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty); + Value *Args[] = {Addr, CmpOperand, NewOperand, createOrdering(&IRB, CASI->getSuccessOrdering()), createOrdering(&IRB, CASI->getFailureOrdering())}; CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); @@ -793,7 +1379,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { } Value *Res = - IRB.CreateInsertValue(PoisonValue::get(CASI->getType()), OldVal, 0); + IRB.CreateInsertValue(PoisonValue::get(CASI->getType()), OldVal, 0); Res = IRB.CreateInsertValue(Res, Success, 1); I->replaceAllUsesWith(Res); @@ -817,8 +1403,8 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, return -1; } uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); - if (TypeSize != 8 && TypeSize != 16 && - TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && + TypeSize != 128) { NumAccessesWithBadSize++; // Ignore all unusual sizes. return -1; @@ -826,4 +1412,4 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, size_t Idx = llvm::countr_zero(TypeSize / 8); assert(Idx < kNumberOfAccessSizes); return Idx; -} +} \ No newline at end of file diff --git a/llvm/test/Instrumentation/ThreadSanitizer/dominance-elimination.ll b/llvm/test/Instrumentation/ThreadSanitizer/dominance-elimination.ll new file mode 100644 index 0000000000000..acc583c078111 --- /dev/null +++ b/llvm/test/Instrumentation/ThreadSanitizer/dominance-elimination.ll @@ -0,0 +1,540 @@ +; RUN: opt -passes=tsan -tsan-use-dominance-analysis < %s -S | FileCheck %s + +; This file contains tests for the TSan dominance-based optimization. +; We check that redundant instrumentation is removed when one access +; dominates/post-dominates another, and is NOT removed when the path between +; them is "dirty". + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; --- Global variables for testing (minimized to two) --- +@g1 = common global i32 0, align 4 +@g2 = common global i32 0, align 4 + +; --- External Function Declarations for Tests --- +declare void @some_external_call() +declare void @safe_func() #0 +declare void @external_check() + +; ============================================================================= +; INTRA-BLOCK DOMINANCE TESTS +; ============================================================================= + +define void @test_intra_block_write_write() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + store i32 2, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_intra_block_write_write +; CHECK: call void @__tsan_write4(ptr @g1) +; The second write is dominated and should NOT be instrumented. +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +define void @test_intra_block_write_read() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %val = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_intra_block_write_read +; CHECK: call void @__tsan_write4(ptr @g1) +; The read is dominated and should NOT be instrumented. +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +define void @test_intra_block_read_read() nounwind uwtable sanitize_thread { +entry: + %val1 = load i32, ptr @g1, align 4 + %val2 = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_intra_block_read_read +; CHECK: call void @__tsan_read4(ptr @g1) +; The second read is dominated and should NOT be instrumented. +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; PATH CLEARNESS TESTS +; ============================================================================= + +define void @test_path_not_clear_call() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + call void @some_external_call() + store i32 2, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_path_not_clear_call +; CHECK: call void @__tsan_write4(ptr @g1) +; An unsafe call makes the path dirty. Optimization must NOT trigger. +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +define void @test_path_clear_safe_call() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + call void @safe_func() + store i32 2, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_path_clear_safe_call +; CHECK: call void @__tsan_write4(ptr @g1) +; A safe intrinsic call should not block the optimization. +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; INTER-BLOCK DOMINANCE TESTS +; ============================================================================= + +define void @test_inter_block_dom(i1 %cond) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br i1 %cond, label %if.then, label %if.else +if.then: + store i32 2, ptr @g1, align 4 + br label %if.end +if.else: + store i32 3, ptr @g1, align 4 + br label %if.end +if.end: + ret void +} +; CHECK-LABEL: define void @test_inter_block_dom +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: if.then: +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: if.else: +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; POST-DOMINANCE TESTS +; ============================================================================= + +define void @test_post_dom(i1 %cond) nounwind uwtable sanitize_thread { +entry: + br i1 %cond, label %if.then, label %if.else +if.then: + store i32 2, ptr @g1, align 4 + br label %if.end +if.else: + store i32 3, ptr @g1, align 4 + br label %if.end +if.end: + store i32 4, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_post_dom +; CHECK: if.then: +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: if.else: +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: if.end: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; ALIAS ANALYSIS TESTS +; ============================================================================= + +; Simple alias analysis: no alias. +define void @test_no_alias() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + store i32 2, ptr @g2, align 4 + ret void +} +; CHECK-LABEL: define void @test_no_alias +; CHECK: call void @__tsan_write4(ptr @g1) +; Different addresses. The optimization must NOT trigger. +; CHECK: call void @__tsan_write4(ptr @g2) +; CHECK: ret void + +; MustAlias via zero-index GEP (should eliminate) +define void @alias_mustalias_gep0() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %p = getelementptr i32, ptr @g1, i64 0 + %v = load i32, ptr %p, align 4 + ret void +} +; CHECK-LABEL: define void @alias_mustalias_gep0 +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; Different offsets within the same base object (should NOT eliminate) +@arr = common global [5 x i32] zeroinitializer, align 4 +define void @alias_different_offsets() nounwind uwtable sanitize_thread { +entry: + %p0 = getelementptr [5 x i32], ptr @arr, i64 0, i64 0 + %p1 = getelementptr [5 x i32], ptr @arr, i64 0, i64 1 + store i32 1, ptr %p0, align 4 + store i32 2, ptr %p1, align 4 + ret void +} +; CHECK-LABEL: define void @alias_different_offsets +; CHECK: call void @__tsan_write4( +; CHECK: call void @__tsan_write4( +; CHECK: ret void + +; Equal offsets within the same base object (should eliminate) +define void @alias_same_offsets() nounwind uwtable sanitize_thread { +entry: + %p0 = getelementptr [5 x i32], ptr @arr, i64 0, i64 1 + %p1 = getelementptr [5 x i32], ptr @arr, i64 0, i64 1 + store i32 1, ptr %p0, align 4 + store i32 2, ptr %p1, align 4 + ret void +} +; CHECK-LABEL: define void @alias_same_offsets +; CHECK: call void @__tsan_write4( +; CHECK-NOT: call void @__tsan_write4( +; CHECK: ret void + + +; MayAlias via phi of two globals (should NOT eliminate) +define void @alias_mayalias_phi(i1 %c) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br i1 %c, label %A, label %B +A: + br label %join +B: + br label %join +join: + %p = phi ptr [ @g1, %A ], [ @g2, %B ] + %v = load i32, ptr %p, align 4 + ret void +} +; CHECK-LABEL: define void @alias_mayalias_phi +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: join: +; CHECK: call void @__tsan_read4( +; CHECK: ret void + +; Pointer round-trip via ptrtoint/inttoptr (typically breaks MustAlias) +; (should NOT eliminate) +define void @alias_ptr_roundtrip() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %i = ptrtoint ptr @g1 to i64 + %p2 = inttoptr i64 %i to ptr + %v = load i32, ptr %p2, align 4 + ret void +} +; CHECK-LABEL: define void @alias_ptr_roundtrip +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: call void @__tsan_read4( +; CHECK: ret void + +; Bitcast-based MustAlias (i32* <-> i8*) (should eliminate) +define void @alias_bitcast_i8_i32() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %p8 = bitcast ptr @g1 to ptr + %v = load i32, ptr %p8, align 4 + ret void +} +; CHECK-LABEL: define void @alias_bitcast_i8_i32 +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; GEP with folded zero offset is MustAlias: (%n - %n) -> 0 +define void @alias_gep_folded_zero(i64 %n) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %t = sub i64 %n, %n + %p = getelementptr i32, ptr @g1, i64 %t + %v = load i32, ptr %p, align 4 + ret void +} +; CHECK-LABEL: define void @alias_gep_folded_zero +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +define void @alias_select_same_ptr(i1 %c) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + %p = select i1 %c, ptr @g1, ptr @g1 + %v = load i32, ptr %p, align 4 + ret void +} +; CHECK-LABEL: define void @alias_select_same_ptr +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; BRANCHING WITH MULTIPLE PATHS (one path dirty) +; ============================================================================= + +; Case A: inter-BB with a diamond where one branch is dirty. +; Path entry -> then (unsafe) -> merge, and entry -> else (safe) -> merge. +define void @multi_path_inter_dirty(i1 %cond) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br i1 %cond, label %then, label %else + +then: + call void @some_external_call() + br label %merge + +else: + call void @safe_func() + br label %merge + +merge: + %v = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @multi_path_inter_dirty +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; Dirty along one path => must instrument at merge. +; CHECK: merge: +; CHECK: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; Case B: inter-BB where both branches are safe (no dangerous instr). Should eliminate. +define void @multi_path_inter_clean(i1 %cond) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br i1 %cond, label %then, label %else + +then: + call void @safe_func() + br label %merge + +else: + call void @safe_func() + br label %merge + +merge: + %v = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @multi_path_inter_clean +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; Both paths clean => dominated read at merge should be removed. +; CHECK: merge: +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; MIXED: intra-BB safe suffix vs. inter-BB dirty path +; ============================================================================= +define void @mixed_intra_inter(i1 %cond) nounwind uwtable sanitize_thread { +entry: + ; intra-BB suffix between store and next store is safe (no calls) + store i32 1, ptr @g1, align 4 + store i32 2, ptr @g1, align 4 + br i1 %cond, label %dirty, label %clean + +dirty: + ; dangerous call on one path + call void @some_external_call() + br label %merge + +clean: + ; safe on other path + call void @safe_func() + br label %merge + +merge: + ; must keep because one incoming path is dirty + store i32 3, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @mixed_intra_inter +; First store instruments. +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; Second store in same BB is dominated by the first and safe => removed. +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; Final store must remain due to dirty path. +; CHECK: merge: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; POST-DOM with dirty suffix at start BB blocks elimination (renamed BBs) +; ============================================================================= +define void @postdom_dirty_start_suffix(i1 %cond) nounwind uwtable sanitize_thread { +entry: + ; Initial write + store i32 1, ptr @g1, align 4 + ; Dirty suffix in the start block blocks elimination + call void @some_external_call() + br i1 %cond, label %path_then, label %path_else + +path_then: + br label %merge + +path_else: + br label %merge + +merge: + ; Despite post-dominance, path is not clear due to dirty suffix in entry + %v = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @postdom_dirty_start_suffix +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: call void @some_external_call() +; CHECK: merge: +; CHECK: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; DIRTY PREFIX IN END BB blocks elimination (prefixSafe) +; ============================================================================= +define void @dirty_prefix_in_end_bb() nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br label %end + +end: + ; Dirty prefix in the end block before the target access + call void @some_external_call() + %v = load i32, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @dirty_prefix_in_end_bb +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: end: +; CHECK: call void @some_external_call() +; CHECK: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; IRRELEVANT DIRTY PATH NOT REACHING EndBB should not block elimination +; ============================================================================= +define void @dirty_unrelated_cone(i1 %cond) nounwind uwtable sanitize_thread { +entry: + store i32 1, ptr @g1, align 4 + br i1 %cond, label %to_end, label %to_dead + +to_end: + br label %end + +to_dead: + ; Dirty path that does NOT reach %end at all + call void @some_external_call() + br label %dead + +dead: + ret void + +end: + %v = load i32, ptr @g1, align 4 + ret void +} +; The dirty path is outside the cone to %end, so read can be eliminated. +; CHECK-LABEL: define void @dirty_unrelated_cone +; CHECK: entry: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: end: +; CHECK-NOT: call void @__tsan_read4(ptr @g1) +; CHECK: ret void + +; ============================================================================= +; POST-DOMINANCE WITH LOOP +; ============================================================================= +define void @postdom_loop() nounwind uwtable sanitize_thread { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %call = call i32 (...) @external_check() + %tobool = icmp ne i32 %call, 0 + br i1 %tobool, label %while.body, label %while.end + +while.body: ; preds = %while.cond + store i32 1, ptr @g1, align 4 + br label %while.cond + +while.end: ; preds = %while.cond + store i32 2, ptr @g1, align 4 + ret void +} +; It's a potentially infinite loop, +; so the store in while.end should not be eliminated. +; CHECK-LABEL: define void @postdom_loop +; CHECK: while.body: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: while.end: +; CHECK: call void @__tsan_write4(ptr @g1) + +; ============================================================================= +; POST-DOMINANCE BLOCKED BY POTENTIAL INFINITE LOOP (CallBase check) +; ============================================================================= + +declare void @dom_safe_but_postdom_unsafe() #0 + +define void @test_post_dom_blocked_by_readnone_call(i1 %cond) nounwind uwtable sanitize_thread { +entry: + br i1 %cond, label %if.then, label %if.else +if.then: + ; dominated by neither entry nor if.end (in terms of domination elimination flow) + ; but post-dominated by if.end + store i32 1, ptr @g1, align 4 + call void @dom_safe_but_postdom_unsafe() + br label %if.end +if.else: + br label %if.end +if.end: + store i32 2, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_post_dom_blocked_by_readnone_call +; CHECK: if.then: +; The call is readnone (safe for sync) but not an intrinsic (unsafe for termination). +; So the first write must NOT be eliminated (post-dominance is blocked). +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: call void @dom_safe_but_postdom_unsafe() +; CHECK: if.end: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +declare void @postdom_safe_func() #1 + +define void @test_post_dom_allowed_by_postdome_safe_func(i1 %cond) nounwind uwtable sanitize_thread { +entry: + br i1 %cond, label %if.then, label %if.else +if.then: + store i32 1, ptr @g1, align 4 + call void @postdom_safe_func() + br label %if.end +if.else: + br label %if.end +if.end: + store i32 2, ptr @g1, align 4 + ret void +} +; CHECK-LABEL: define void @test_post_dom_allowed_by_postdome_safe_func +; CHECK: if.then: +; CHECK-NOT: call void @__tsan_write4(ptr @g1) +; CHECK: call void @postdom_safe_func() +; CHECK: if.end: +; CHECK: call void @__tsan_write4(ptr @g1) +; CHECK: ret void + +; Attributes for the "safe" function +attributes #0 = { nosync } +attributes #1 = { nosync willreturn nounwind } >From 639e24d43476e9761f036feaa0e2fb5b9f1d81d1 Mon Sep 17 00:00:00 2001 From: Alexey Paznikov <[email protected]> Date: Tue, 27 Jan 2026 18:03:28 +0800 Subject: [PATCH 2/2] [TSan] Enable Dominance elimination by default and address review feedback Prepare the Dominance-based elimination pass for upstreaming: * Enable the optimization by default (-tsan-use-dominance-analysis=true). * Add a detailed comment block explaining the dominance and post-dominance logic, as well as the strict safety guarantees. * Remove the experimental aggressive post-dominance logic entirely to keep the implementation strictly sound and simple. * Clean up custom lit test configurations. * Explicitly disable the optimization in `capture-no-omit.ll` and `read_before_write.ll` to preserve existing test coverage. * Include a small drive-by fix for `sanitizer-ld.c` to prevent test failures when the build directory path contains the substring "tsan". --- clang/test/Driver/sanitizer-ld.c | 7 +- compiler-rt/test/tsan/CMakeLists.txt | 115 +++++------------- compiler-rt/test/tsan/lit.cfg.py | 10 -- compiler-rt/test/tsan/lit.site.cfg.py.in | 3 - .../Instrumentation/ThreadSanitizer.cpp | 13 +- .../ThreadSanitizer/capture-no-omit.ll | 2 +- .../ThreadSanitizer/read_before_write.ll | 8 +- 7 files changed, 47 insertions(+), 111 deletions(-) diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index 53a138e31b801..327a0d314c3d7 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -1376,7 +1376,8 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | %{filecheck} --check-prefix=CHECK-RELOCATABLE-LINK-ASAN-UBSAN-RTLIB // -// CHECK-RELOCATABLE-LINK-ASAN-UBSAN-RTLIB-NOT: "{{.*}}(asan|ubsan){{.*}}" +// CHECK-RELOCATABLE-LINK-ASAN-UBSAN-RTLIB-NOT: libclang_rt.asan +// CHECK-RELOCATABLE-LINK-ASAN-UBSAN-RTLIB-NOT: libclang_rt.ubsan // RUN: %clang -fsanitize=address -r -fsanitize-link-runtime -### %s 2>&1 \ // RUN: --target=x86_64-unknown-linux -fuse-ld=ld \ @@ -1384,7 +1385,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck %s --check-prefix=CHECK-RELOCATABLE-LINK-FORCE-LINK-ASAN // -// CHECK-RELOCATABLE-LINK-FORCE-LINK-ASAN: "{{.*}}asan{{.*}}" +// CHECK-RELOCATABLE-LINK-FORCE-LINK-ASAN: libclang_rt.asan // RUN: %clang -fsanitize=thread -r -### %s 2>&1 \ // RUN: --target=x86_64-unknown-linux -fuse-ld=ld \ @@ -1392,7 +1393,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | %{filecheck} --check-prefix=CHECK-RELOCATABLE-LINK-TSAN-RTLIB // -// CHECK-RELOCATABLE-LINK-TSAN-RTLIB-NOT: "{{.*}}tsan{{.*}}" +// CHECK-RELOCATABLE-LINK-TSAN-RTLIB-NOT: libclang_rt.tsan // RUN: %clang -fsanitize=fuzzer,address -shared-libsan -### %s 2>&1 \ // RUN: --target=x86_64-unknown-linux -fuse-ld=ld \ diff --git a/compiler-rt/test/tsan/CMakeLists.txt b/compiler-rt/test/tsan/CMakeLists.txt index 7cf0d19efb039..4cd88507a2baf 100644 --- a/compiler-rt/test/tsan/CMakeLists.txt +++ b/compiler-rt/test/tsan/CMakeLists.txt @@ -18,7 +18,6 @@ endif() set(TSAN_DYNAMIC_TEST_DEPS ${TSAN_TEST_DEPS}) set(TSAN_TESTSUITES) set(TSAN_DYNAMIC_TESTSUITES) -set(TSAN_ENABLE_DOMINANCE_ANALYSIS "False") # Disable dominance analysis by default if (NOT DEFINED TSAN_TEST_DEFLAKE_THRESHOLD) set(TSAN_TEST_DEFLAKE_THRESHOLD "10") @@ -29,79 +28,47 @@ if(APPLE) darwin_filter_host_archs(TSAN_SUPPORTED_ARCH TSAN_TEST_ARCH) endif() -# Unified function for generating TSAN test suites by architectures. -# Arguments: -# OUT_LIST_VAR - name of output list (for example, TSAN_TESTSUITES or TSAN_DOM_TESTSUITES) -# SUFFIX_KIND - string added to config suffix after "-${arch}" (for example, "" or "-dominance") -# CONFIG_KIND - string added to config name after "Config" (for example, "" or "Dominance") -# ENABLE_DOM - "True"/"False" enable dominance analysis -function(tsan_generate_arch_suites OUT_LIST_VAR SUFFIX_KIND CONFIG_KIND ENABLE_DOM) - foreach(arch ${TSAN_TEST_ARCH}) - set(TSAN_ENABLE_DOMINANCE_ANALYSIS "${ENABLE_DOM}") +foreach(arch ${TSAN_TEST_ARCH}) + set(TSAN_TEST_APPLE_PLATFORM "osx") + set(TSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_osx_MIN_VER_FLAG}") + set(TSAN_TEST_APPLE_TARGET_IS_HOST ON) + pythonize_bool(TSAN_TEST_APPLE_TARGET_IS_HOST) - set(TSAN_TEST_APPLE_PLATFORM "osx") - set(TSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG "${DARWIN_osx_MIN_VER_FLAG}") - set(TSAN_TEST_APPLE_TARGET_IS_HOST ON) - pythonize_bool(TSAN_TEST_APPLE_TARGET_IS_HOST) + set(TSAN_TEST_TARGET_ARCH ${arch}) + string(TOLOWER "-${arch}" TSAN_TEST_CONFIG_SUFFIX) + get_test_cc_for_arch(${arch} TSAN_TEST_TARGET_CC TSAN_TEST_TARGET_CFLAGS) - set(TSAN_TEST_TARGET_ARCH ${arch}) - string(TOLOWER "-${arch}${SUFFIX_KIND}" TSAN_TEST_CONFIG_SUFFIX) - get_test_cc_for_arch(${arch} TSAN_TEST_TARGET_CC TSAN_TEST_TARGET_CFLAGS) + string(REPLACE ";" " " LIBDISPATCH_CFLAGS_STRING " ${COMPILER_RT_TEST_LIBDISPATCH_CFLAGS}") + string(APPEND TSAN_TEST_TARGET_CFLAGS ${LIBDISPATCH_CFLAGS_STRING}) - string(REPLACE ";" " " LIBDISPATCH_CFLAGS_STRING " ${COMPILER_RT_TEST_LIBDISPATCH_CFLAGS}") - string(APPEND TSAN_TEST_TARGET_CFLAGS ${LIBDISPATCH_CFLAGS_STRING}) - - if (COMPILER_RT_HAS_MSSE4_2_FLAG) - string(APPEND TSAN_TEST_TARGET_CFLAGS " -msse4.2 ") - endif() + if (COMPILER_RT_HAS_MSSE4_2_FLAG) + string(APPEND TSAN_TEST_TARGET_CFLAGS " -msse4.2 ") + endif() - string(TOUPPER ${arch} ARCH_UPPER_CASE) - set(CONFIG_NAME ${ARCH_UPPER_CASE}Config${CONFIG_KIND}) + string(TOUPPER ${arch} ARCH_UPPER_CASE) + set(CONFIG_NAME ${ARCH_UPPER_CASE}Config) - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) - list(APPEND ${OUT_LIST_VAR} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) - # Dynamic runtime for corresponding variant - if("${SUFFIX_KIND}" STREQUAL "") - string(TOLOWER "-${arch}-${OS_NAME}-dynamic" TSAN_TEST_CONFIG_SUFFIX) - set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig${CONFIG_KIND}) - list(APPEND TSAN_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - else() - string(TOLOWER "-${arch}-${OS_NAME}-dynamic${SUFFIX_KIND}" TSAN_TEST_CONFIG_SUFFIX) - set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig${CONFIG_KIND}) - # Track dynamic dominance-analysis suites separately for a dedicated target. - list(APPEND TSAN_DOM_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - endif() - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) + string(TOLOWER "-${arch}-${OS_NAME}-dynamic" TSAN_TEST_CONFIG_SUFFIX) + set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) - list(APPEND ${OUT_LIST_VAR} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - endif() - endforeach() - - # Propagate the assembled list to the parent scope - set(${OUT_LIST_VAR} "${${OUT_LIST_VAR}}" PARENT_SCOPE) - if(DEFINED TSAN_DOM_DYNAMIC_TESTSUITES) - set(TSAN_DOM_DYNAMIC_TESTSUITES "${TSAN_DOM_DYNAMIC_TESTSUITES}" PARENT_SCOPE) + list(APPEND TSAN_DYNAMIC_TESTSUITES + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) endif() -endfunction() - -# Default configuration -set(TSAN_TESTSUITES) -tsan_generate_arch_suites(TSAN_TESTSUITES "" "" "False") - -# Enable dominance analysis (check-tsan-dominance-analysis target) -set(TSAN_DOM_TESTSUITES) -tsan_generate_arch_suites(TSAN_DOM_TESTSUITES "-dominance" "Dominance" "True") +endforeach() # iOS and iOS simulator test suites # These are not added into "check-all", in order to run these tests, use @@ -161,10 +128,6 @@ list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit) if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) list(APPEND TSAN_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/dynamic) endif() -list(APPEND TSAN_DOM_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit) -if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) - list(APPEND TSAN_DOM_DYNAMIC_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/dynamic) -endif() add_lit_testsuite(check-tsan "Running ThreadSanitizer tests" ${TSAN_TESTSUITES} @@ -177,17 +140,3 @@ if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) EXCLUDE_FROM_CHECK_ALL DEPENDS ${TSAN_DYNAMIC_TEST_DEPS}) endif() - -add_lit_testsuite(check-tsan-dominance-analysis "Running ThreadSanitizer tests (dominance analysis)" - ${TSAN_DOM_TESTSUITES} - DEPENDS ${TSAN_TEST_DEPS}) -set_target_properties(check-tsan-dominance-analysis PROPERTIES FOLDER "Compiler-RT Tests") - -# New target: dynamic + dominance analysis -if(COMPILER_RT_TSAN_HAS_STATIC_RUNTIME) - add_lit_testsuite(check-tsan-dominance-analysis-dynamic "Running ThreadSanitizer tests (dynamic, dominance analysis)" - ${TSAN_DOM_DYNAMIC_TESTSUITES} - EXCLUDE_FROM_CHECK_ALL - DEPENDS ${TSAN_DYNAMIC_TEST_DEPS}) - set_target_properties(check-tsan-dominance-analysis-dynamic PROPERTIES FOLDER "Compiler-RT Tests") -endif() diff --git a/compiler-rt/test/tsan/lit.cfg.py b/compiler-rt/test/tsan/lit.cfg.py index 0e7dd5da36c23..8803a7bda9aa5 100644 --- a/compiler-rt/test/tsan/lit.cfg.py +++ b/compiler-rt/test/tsan/lit.cfg.py @@ -56,16 +56,6 @@ def get_required_attr(config, attr_name): + extra_cflags + ["-I%s" % tsan_incdir] ) - -# Setup dominance-based elimination if enabled -tsan_enable_dominance = ( - getattr(config, "tsan_enable_dominance_analysis", "False") == "True" -) -if tsan_enable_dominance: - config.name += " (dominance-analysis)" - dom_flags = ["-mllvm", "-tsan-use-dominance-analysis"] - clang_tsan_cflags += dom_flags - clang_tsan_cxxflags = ( config.cxx_mode_flags + clang_tsan_cflags + ["-std=c++11"] + ["-I%s" % tsan_incdir] ) diff --git a/compiler-rt/test/tsan/lit.site.cfg.py.in b/compiler-rt/test/tsan/lit.site.cfg.py.in index a7e48854c22bb..458da18a17147 100644 --- a/compiler-rt/test/tsan/lit.site.cfg.py.in +++ b/compiler-rt/test/tsan/lit.site.cfg.py.in @@ -10,9 +10,6 @@ config.target_cflags = "@TSAN_TEST_TARGET_CFLAGS@" config.target_arch = "@TSAN_TEST_TARGET_ARCH@" config.deflake_threshold = "@TSAN_TEST_DEFLAKE_THRESHOLD@" -# Enable dominance analysis. -config.tsan_enable_dominance_analysis = "@TSAN_ENABLE_DOMINANCE_ANALYSIS@" - # Load common config for all compiler-rt lit tests. lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d787a0378639b..3ace6bf7d29d5 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -88,15 +88,15 @@ static cl::opt<bool> ClOmitNonCaptured("tsan-omit-by-pointer-capturing", cl::init(true), cl::desc("Omit accesses due to pointer capturing"), cl::Hidden); +// Eliminates redundant instrumentation based on (post-)dominance relationships +// within the control flow graph. If an access A (post-)dominates an access B +// targeting the same memory location, and there is no synchronization and calls +// between them, instrumenting A is sufficient to catch the data race. static cl::opt<bool> - ClUseDominanceAnalysis("tsan-use-dominance-analysis", cl::init(false), + ClUseDominanceAnalysis("tsan-use-dominance-analysis", cl::init(true), cl::desc("Eliminate duplicating instructions which " "(post)dominate given instruction"), cl::Hidden); -static cl::opt<bool> ClPostDomAggressive( - "tsan-postdom-aggressive", cl::init(false), - cl::desc("Allow post-dominance elimination across loops (unsafe)"), - cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -522,8 +522,7 @@ DominanceBasedElimination::traverseReachableAndCheckSafety( // Post-dom safety: any intermediate BB that is part of a loop // makes elimination unsafe (potential infinite loop). - if (!ClPostDomAggressive && PostDomSafety && - BSC.HasDangerInBBPostDom.lookup(BB)) + if (!PostDomSafety && BSC.HasDangerInBBPostDom.lookup(BB)) PostDomSafety = false; // Any dangerous instruction in an intermediate BB makes the path “dirty”. diff --git a/llvm/test/Instrumentation/ThreadSanitizer/capture-no-omit.ll b/llvm/test/Instrumentation/ThreadSanitizer/capture-no-omit.ll index cae04936002cd..f7d6eb56ba94f 100644 --- a/llvm/test/Instrumentation/ThreadSanitizer/capture-no-omit.ll +++ b/llvm/test/Instrumentation/ThreadSanitizer/capture-no-omit.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tsan -tsan-omit-by-pointer-capturing=0 -S | FileCheck %s +; RUN: opt < %s -passes=tsan -tsan-omit-by-pointer-capturing=0 -tsan-use-dominance-analysis=0 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll index 1ef283d8f8f4c..7c2de00640785 100644 --- a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll +++ b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -passes=tsan -S | FileCheck --check-prefixes=CHECK,CHECK-OPT %s -; RUN: opt < %s -passes=tsan -tsan-instrument-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNOPT -; RUN: opt < %s -passes=tsan -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND -; RUN: opt < %s -passes=tsan -tsan-distinguish-volatile -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND-VOLATILE +; RUN: opt < %s -passes=tsan -tsan-use-dominance-analysis=0 -S | FileCheck --check-prefixes=CHECK,CHECK-OPT %s +; RUN: opt < %s -passes=tsan -tsan-instrument-read-before-write -tsan-use-dominance-analysis=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNOPT +; RUN: opt < %s -passes=tsan -tsan-compound-read-before-write -tsan-use-dominance-analysis=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND +; RUN: opt < %s -passes=tsan -tsan-distinguish-volatile -tsan-compound-read-before-write -tsan-use-dominance-analysis=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND-VOLATILE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
