Author: Florian Mayer Date: 2024-12-17T00:47:02-08:00 New Revision: 514580b43898921cc95659de47b383bd2c9b4b12
URL: https://github.com/llvm/llvm-project/commit/514580b43898921cc95659de47b383bd2c9b4b12 DIFF: https://github.com/llvm/llvm-project/commit/514580b43898921cc95659de47b383bd2c9b4b12.diff LOG: [MTE] Apply alignment / size in AsmPrinter rather than IR (#111918) This makes sure no optimizations are applied that assume the bigger alignment or size, which could be incorrect if we link together with non-instrumented code. Added: Modified: clang/lib/CodeGen/SanitizerMetadata.cpp clang/test/CodeGen/memtag-globals.cpp llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/lib/IR/Verifier.cpp llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/test/CodeGen/AArch64/O0-pipeline.ll llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/unittests/IR/VerifierTest.cpp llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn Removed: llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp ################################################################################ diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index c1a6b223480a19..cf4f74d1c50afa 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -31,6 +31,37 @@ static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) { return Mask; } +static bool shouldTagGlobal(const llvm::GlobalVariable &G) { + // For now, don't instrument constant data, as it'll be in .rodata anyway. It + // may be worth instrumenting these in future to stop them from being used as + // gadgets. + if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant()) + return false; + + // Globals can be placed implicitly or explicitly in sections. There's two + // diff erent types of globals that meet this criteria that cause problems: + // 1. Function pointers that are going into various init arrays (either + // explicitly through `__attribute__((section(<foo>)))` or implicitly + // through `__attribute__((constructor)))`, such as ".(pre)init(_array)", + // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up + // overaligned and overpadded, making iterating over them problematic, and + // each function pointer is individually tagged (so the iteration over + // them causes SIGSEGV/MTE[AS]ERR). + // 2. Global variables put into an explicit section, where the section's name + // is a valid C-style identifier. The linker emits a `__start_<name>` and + // `__stop_<name>` symbol for the section, so that you can iterate over + // globals within this section. Unfortunately, again, these globals would + // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR. + // + // To mitigate both these cases, and because specifying a section is rare + // outside of these two cases, disable MTE protection for globals in any + // section. + if (G.hasSection()) + return false; + + return true; +} + void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, SourceLocation Loc, StringRef Name, QualType Ty, @@ -57,11 +88,15 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, Meta.NoHWAddress |= CGM.isInNoSanitizeList( FsanitizeArgument.Mask & SanitizerKind::HWAddress, GV, Loc, Ty); - Meta.Memtag |= - static_cast<bool>(FsanitizeArgument.Mask & SanitizerKind::MemtagGlobals); - Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag); - Meta.Memtag &= !CGM.isInNoSanitizeList( - FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty); + if (shouldTagGlobal(*GV)) { + Meta.Memtag |= static_cast<bool>(FsanitizeArgument.Mask & + SanitizerKind::MemtagGlobals); + Meta.Memtag &= !NoSanitizeAttrSet.hasOneOf(SanitizerKind::MemTag); + Meta.Memtag &= !CGM.isInNoSanitizeList( + FsanitizeArgument.Mask & SanitizerKind::MemTag, GV, Loc, Ty); + } else { + Meta.Memtag = false; + } Meta.IsDynInit = IsDynInit && !Meta.NoAddress && FsanitizeArgument.has(SanitizerKind::Address) && diff --git a/clang/test/CodeGen/memtag-globals.cpp b/clang/test/CodeGen/memtag-globals.cpp index ae2d32ae8a56d9..0d7fe22a7b0f07 100644 --- a/clang/test/CodeGen/memtag-globals.cpp +++ b/clang/test/CodeGen/memtag-globals.cpp @@ -10,6 +10,7 @@ // RUN: FileCheck %s --check-prefix=IGNORELIST int global; +int __attribute__((__section__("my_section"))) section_global; int __attribute__((no_sanitize("memtag"))) attributed_global; int __attribute__((disable_sanitizer_instrumentation)) disable_instrumentation_global; int ignorelisted_global; @@ -24,6 +25,8 @@ void func() { // CHECK: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag // CHECK: @{{.*}}global{{.*}} ={{.*}} sanitize_memtag +// CHECK: @{{.*}}section_global{{.*}} = +// CHECK-NOT: sanitize_memtag // CHECK: @{{.*}}attributed_global{{.*}} = // CHECK-NOT: sanitize_memtag // CHECK: @{{.*}}disable_instrumentation_global{{.*}} = @@ -32,7 +35,7 @@ void func() { // CHECK-NOT: sanitize_memtag // CHECK: @{{.*}}static_var{{.*}} ={{.*}} sanitize_memtag -// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}} sanitize_memtag +// CHECK: @{{.*}} = {{.*}} c"Hello, world!\00"{{.*}} // CHECK: @{{.*}}external_global{{.*}} ={{.*}} sanitize_memtag // IGNORELIST: @{{.*}}extra_global{{.*}} ={{.*}} sanitize_memtag diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 575ef10d9bbdef..a1f5e5d208ffba 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2406,12 +2406,50 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { OutStreamer->emitBinaryData(Buf); } +static void tagGlobalDefinition(Module &M, GlobalVariable *G) { + Constant *Initializer = G->getInitializer(); + uint64_t SizeInBytes = + M.getDataLayout().getTypeAllocSize(Initializer->getType()); + + uint64_t NewSize = alignTo(SizeInBytes, 16); + if (SizeInBytes != NewSize) { + // Pad the initializer out to the next multiple of 16 bytes. + llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0); + Constant *Padding = ConstantDataArray::get(M.getContext(), Init); + Initializer = ConstantStruct::getAnon({Initializer, Padding}); + auto *NewGV = new GlobalVariable( + M, Initializer->getType(), G->isConstant(), G->getLinkage(), + Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace()); + NewGV->copyAttributesFrom(G); + NewGV->setComdat(G->getComdat()); + NewGV->copyMetadata(G, 0); + + NewGV->takeName(G); + G->replaceAllUsesWith(NewGV); + G->eraseFromParent(); + G = NewGV; + } + + if (G->getAlign().valueOrOne() < 16) + G->setAlignment(Align(16)); + + // Ensure that tagged globals don't get merged by ICF - as they should have + // diff erent tags at runtime. + G->setUnnamedAddr(GlobalValue::UnnamedAddr::None); +} + bool AsmPrinter::doFinalization(Module &M) { // Set the MachineFunction to nullptr so that we can catch attempted // accesses to MF specific features at the module level and so that // we can conditionalize accesses based on whether or not it is nullptr. MF = nullptr; + for (GlobalVariable &G : make_early_inc_range(M.globals())) { + if (G.isDeclaration() || !G.isTagged()) + continue; + tagGlobalDefinition(M, &G); + } + // Gather all GOT equivalent globals in the module. We really need two // passes over the globals: one to compute and another to avoid its emission // in EmitGlobalVariable, otherwise we would not be able to handle cases diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 55de486e90e190..48e27763017be9 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -808,6 +808,10 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { "visibility must be dso_local!", &GV); + if (GV.isTagged()) { + Check(!GV.hasSection(), "tagged GlobalValue must not be in section.", &GV); + } + forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool { if (const Instruction *I = dyn_cast<Instruction>(V)) { if (!I->getParent() || !I->getParent()->getParent()) diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 62fbf94e803f0c..ffa578d412b3c2 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -72,7 +72,6 @@ FunctionPass *createAArch64PostLegalizerLowering(); FunctionPass *createAArch64PostSelectOptimize(); FunctionPass *createAArch64StackTaggingPass(bool IsOptNone); FunctionPass *createAArch64StackTaggingPreRAPass(); -ModulePass *createAArch64GlobalsTaggingPass(); ModulePass *createAArch64Arm64ECCallLoweringPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); @@ -89,7 +88,6 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &); void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); void initializeAArch64ExpandPseudoPass(PassRegistry &); -void initializeAArch64GlobalsTaggingPass(PassRegistry &); void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); void initializeAArch64MIPeepholeOptPass(PassRegistry &); diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp deleted file mode 100644 index a49d391d9148c3..00000000000000 --- a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp +++ /dev/null @@ -1,155 +0,0 @@ -//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// - -#include "AArch64.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" - -#include <algorithm> - -using namespace llvm; - -static const Align kTagGranuleSize = Align(16); - -static bool shouldTagGlobal(const GlobalVariable &G) { - // For now, don't instrument constant data, as it'll be in .rodata anyway. It - // may be worth instrumenting these in future to stop them from being used as - // gadgets. - if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant()) - return false; - - // Globals can be placed implicitly or explicitly in sections. There's two - // diff erent types of globals that meet this criteria that cause problems: - // 1. Function pointers that are going into various init arrays (either - // explicitly through `__attribute__((section(<foo>)))` or implicitly - // through `__attribute__((constructor)))`, such as ".(pre)init(_array)", - // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up - // overaligned and overpadded, making iterating over them problematic, and - // each function pointer is individually tagged (so the iteration over - // them causes SIGSEGV/MTE[AS]ERR). - // 2. Global variables put into an explicit section, where the section's name - // is a valid C-style identifier. The linker emits a `__start_<name>` and - // `__stop_<name>` symbol for the section, so that you can iterate over - // globals within this section. Unfortunately, again, these globals would - // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR. - // - // To mitigate both these cases, and because specifying a section is rare - // outside of these two cases, disable MTE protection for globals in any - // section. - if (G.hasSection()) - return false; - - return true; -} - -// Technically, due to ELF symbol interposition semantics, we can't change the -// alignment or size of symbols. If we increase the alignment or size of a -// symbol, the compiler may make optimisations based on this new alignment or -// size. If the symbol is interposed, this optimisation could lead to -// alignment-related or OOB read/write crashes. -// -// This is handled in the linker. When the linker sees multiple declarations of -// a global variable, and some are tagged, and some are untagged, it resolves it -// to be an untagged definition - but preserves the tag-granule-rounded size and -// tag-granule-alignment. This should prevent these kind of crashes intra-DSO. -// For cross-DSO, it's been a reasonable contract that if you're interposing a -// sanitizer-instrumented global, then the interposer also needs to be -// sanitizer-instrumented. -// -// FIXME: In theory, this can be fixed by splitting the size/alignment of -// globals into two uses: an "output alignment" that's emitted to the ELF file, -// and an "optimisation alignment" that's used for optimisation. Thus, we could -// adjust the output alignment only, and still optimise based on the pessimistic -// pre-tagging size/alignment. -static void tagGlobalDefinition(Module &M, GlobalVariable *G) { - Constant *Initializer = G->getInitializer(); - uint64_t SizeInBytes = - M.getDataLayout().getTypeAllocSize(Initializer->getType()); - - uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize); - if (SizeInBytes != NewSize) { - // Pad the initializer out to the next multiple of 16 bytes. - llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0); - Constant *Padding = ConstantDataArray::get(M.getContext(), Init); - Initializer = ConstantStruct::getAnon({Initializer, Padding}); - auto *NewGV = new GlobalVariable( - M, Initializer->getType(), G->isConstant(), G->getLinkage(), - Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace()); - NewGV->copyAttributesFrom(G); - NewGV->setComdat(G->getComdat()); - NewGV->copyMetadata(G, 0); - - NewGV->takeName(G); - G->replaceAllUsesWith(NewGV); - G->eraseFromParent(); - G = NewGV; - } - - G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize)); - - // Ensure that tagged globals don't get merged by ICF - as they should have - // diff erent tags at runtime. - G->setUnnamedAddr(GlobalValue::UnnamedAddr::None); -} - -namespace { -class AArch64GlobalsTagging : public ModulePass { -public: - static char ID; - - explicit AArch64GlobalsTagging() : ModulePass(ID) { - initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; - - StringRef getPassName() const override { return "AArch64 Globals Tagging"; } -}; -} // anonymous namespace - -char AArch64GlobalsTagging::ID = 0; - -bool AArch64GlobalsTagging::runOnModule(Module &M) { - // No mutating the globals in-place, or iterator invalidation occurs. - SmallVector<GlobalVariable *> GlobalsToTag; - for (GlobalVariable &G : M.globals()) { - if (G.isDeclaration() || !G.isTagged()) - continue; - - assert(G.hasSanitizerMetadata() && - "Missing sanitizer metadata, but symbol is apparently tagged."); - if (!shouldTagGlobal(G)) { - GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata(); - Meta.Memtag = false; - G.setSanitizerMetadata(Meta); - assert(!G.isTagged()); - } - GlobalsToTag.push_back(&G); - } - - for (GlobalVariable *G : GlobalsToTag) { - tagGlobalDefinition(M, G); - } - - return true; -} - -INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging", - "AArch64 Globals Tagging Pass", false, false) -INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging", - "AArch64 Globals Tagging Pass", false, false) - -ModulePass *llvm::createAArch64GlobalsTaggingPass() { - return new AArch64GlobalsTagging(); -} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 8297f8405845e9..07f072446081a3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -268,7 +268,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { initializeAArch64StackTaggingPreRAPass(*PR); initializeAArch64LowerHomogeneousPrologEpilogPass(*PR); initializeAArch64DAGToDAGISelLegacyPass(*PR); - initializeAArch64GlobalsTaggingPass(*PR); } void AArch64TargetMachine::reset() { SubtargetMap.clear(); } @@ -642,7 +641,6 @@ void AArch64PassConfig::addIRPasses() { if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt) addPass(createSelectOptimizePass()); - addPass(createAArch64GlobalsTaggingPass()); addPass(createAArch64StackTaggingPass( /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None)); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index da13db8e68b0e6..2300e479bc1106 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -57,7 +57,6 @@ add_llvm_target(AArch64CodeGen AArch64FastISel.cpp AArch64A53Fix835769.cpp AArch64FrameLowering.cpp - AArch64GlobalsTagging.cpp AArch64CompressJumpTables.cpp AArch64ConditionOptimizer.cpp AArch64RedundantCopyElimination.cpp diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index e01df9ea03e78b..0d079881cb909c 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -25,8 +25,6 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics -; CHECK-NEXT: AArch64 Globals Tagging -; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Lazy Branch Probability Analysis diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 96c30c4aec0d17..b5d5e27afa17ad 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -71,7 +71,6 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Optimize selects -; CHECK-NEXT: AArch64 Globals Tagging ; CHECK-NEXT: Stack Safety Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 462578a34da837..19aae5edf4a220 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -415,5 +416,26 @@ TEST(VerifierTest, GetElementPtrInst) { << Error; } +TEST(VerifierTest, DetectTaggedGlobalInSection) { + LLVMContext C; + Module M("M", C); + GlobalVariable *GV = new GlobalVariable( + Type::getInt64Ty(C), false, GlobalValue::InternalLinkage, + ConstantInt::get(Type::getInt64Ty(C), 1)); + GV->setDSOLocal(true); + GlobalValue::SanitizerMetadata MD{}; + MD.Memtag = true; + GV->setSanitizerMetadata(MD); + GV->setSection("foo"); + M.insertGlobalVariable(GV); + + std::string Error; + raw_string_ostream ErrorOS(Error); + EXPECT_TRUE(verifyModule(M, &ErrorOS)); + EXPECT_TRUE( + StringRef(Error).starts_with("tagged GlobalValue must not be in section")) + << Error; +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn index 57570de8813751..6ef0bc7a7d67a1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -126,7 +126,6 @@ static_library("LLVMAArch64CodeGen") { "AArch64FalkorHWPFFix.cpp", "AArch64FastISel.cpp", "AArch64FrameLowering.cpp", - "AArch64GlobalsTagging.cpp", "AArch64ISelDAGToDAG.cpp", "AArch64ISelLowering.cpp", "AArch64InstrInfo.cpp", _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits