https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/104542
>From 5a55a8f84d28a70560dec646abd35d84a3a38fe9 Mon Sep 17 00:00:00 2001 From: Fangrui Song <i...@maskray.me> Date: Thu, 15 Aug 2024 19:29:05 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?= =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5-bogner [skip ci] --- llvm/include/llvm/MC/MCAssembler.h | 1 + llvm/include/llvm/MC/MCTargetOptions.h | 2 + .../llvm/MC/MCTargetOptionsCommandFlags.h | 2 + llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | 10 +++ .../MCTargetDesc/AArch64ELFStreamer.cpp | 76 ++++++++++++++++--- .../test/MC/AArch64/mapping-across-sections.s | 57 +++++++++++--- 6 files changed, 129 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index c6fa48128d1891..a68eb49fda2825 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -218,6 +218,7 @@ class MCAssembler { const_iterator begin() const { return Sections.begin(); } const_iterator end() const { return Sections.end(); } + SmallVectorImpl<const MCSymbol *> &getSymbols() { return Symbols; } iterator_range<pointee_iterator< typename SmallVector<const MCSymbol *, 0>::const_iterator>> symbols() const { diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 899299fd15246a..a5371b3387a13d 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -64,6 +64,8 @@ class MCTargetOptions { // Use CREL relocation format for ELF. bool Crel = false; + bool ImplicitMapSyms = false; + // If true, prefer R_X86_64_[REX_]GOTPCRELX to R_X86_64_GOTPCREL on x86-64 // ELF. bool X86RelaxRelocations = true; diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h index 9d592446f3ba77..5e82bc53f3b5ed 100644 --- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -53,6 +53,8 @@ bool getSaveTempLabels(); bool getCrel(); +bool getImplicitMapSyms(); + bool getX86RelaxRelocations(); bool getX86Sse2Avx(); diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp index 813b1194b47cbf..1a4f7e93eeb74a 100644 --- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp +++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp @@ -48,6 +48,7 @@ MCOPT(bool, NoDeprecatedWarn) MCOPT(bool, NoTypeCheck) MCOPT(bool, SaveTempLabels) MCOPT(bool, Crel) +MCOPT(bool, ImplicitMapSyms) MCOPT(bool, X86RelaxRelocations) MCOPT(bool, X86Sse2Avx) MCOPT(std::string, ABIName) @@ -134,6 +135,14 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() { cl::desc("Use CREL relocation format for ELF")); MCBINDOPT(Crel); + static cl::opt<bool> ImplicitMapSyms( + "implicit-mapsyms", + cl::desc("Allow mapping symbol at section beginning to be implicit, " + "lowering number of mapping symbols at the expense of some " + "portability. Recommended for projects that can build all their " + "object files using this option")); + MCBINDOPT(ImplicitMapSyms); + static cl::opt<bool> X86RelaxRelocations( "x86-relax-relocations", cl::desc( @@ -174,6 +183,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() { Options.MCNoTypeCheck = getNoTypeCheck(); Options.MCSaveTempLabels = getSaveTempLabels(); Options.Crel = getCrel(); + Options.ImplicitMapSyms = getImplicitMapSyms(); Options.X86RelaxRelocations = getX86RelaxRelocations(); Options.X86Sse2Avx = getX86Sse2Avx(); Options.EmitDwarfUnwind = getEmitDwarfUnwind(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index ed670bce594ec6..667844f51f079e 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -24,14 +24,15 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormattedStream.h" @@ -176,19 +177,29 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) { /// by MachO. Beware! class AArch64ELFStreamer : public MCELFStreamer { public: + friend AArch64TargetELFStreamer; AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter) : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)), - LastEMS(EMS_None) {} + LastEMS(EMS_None) { + auto *TO = getContext().getTargetOptions(); + ImplicitMapSyms = TO && TO->ImplicitMapSyms; + } void changeSection(MCSection *Section, uint32_t Subsection = 0) override { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. + // Save the mapping symbol state for potential reuse when revisiting the + // section. When ImplicitMapSyms is true, the initial state is + // EMS_A64 for text sections and EMS_Data for the others. LastMappingSymbols[getCurrentSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); + auto It = LastMappingSymbols.find(Section); + if (It != LastMappingSymbols.end()) + LastEMS = It->second; + else if (ImplicitMapSyms) + LastEMS = Section->isText() ? EMS_A64 : EMS_Data; + else + LastEMS = EMS_None; MCELFStreamer::changeSection(Section, Subsection); } @@ -269,15 +280,15 @@ class AArch64ELFStreamer : public MCELFStreamer { LastEMS = EMS_A64; } - void emitMappingSymbol(StringRef Name) { + MCSymbol *emitMappingSymbol(StringRef Name) { auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name)); emitLabel(Symbol); - Symbol->setType(ELF::STT_NOTYPE); - Symbol->setBinding(ELF::STB_LOCAL); + return Symbol; } DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; ElfMappingSymbol LastEMS; + bool ImplicitMapSyms; }; } // end anonymous namespace @@ -299,6 +310,53 @@ void AArch64TargetELFStreamer::finish() { AArch64ELFStreamer &S = getStreamer(); MCContext &Ctx = S.getContext(); auto &Asm = S.getAssembler(); + + // If ImplicitMapSyms is specified, ensure that text sections end with + // the A64 state while non-text sections end with the data state. When + // sections are combined by the linker, the subsequent section will start with + // the right state. The ending mapping symbol is added right after the last + // symbol relative to the section. When a dumb linker combines (.text.0; .word + // 0) and (.text.1; .word 0), the ending $x of .text.0 precedes the $d of + // .text.1, even if they have the same address. + if (S.ImplicitMapSyms) { + auto &Syms = Asm.getSymbols(); + const size_t NumSyms = Syms.size(); + DenseMap<MCSection *, MCSymbol *> EndMappingSym; + for (MCSection &Sec : Asm) { + S.switchSection(&Sec); + if (S.LastEMS == (Sec.isText() ? AArch64ELFStreamer::EMS_Data + : AArch64ELFStreamer::EMS_A64)) + EndMappingSym.try_emplace( + &Sec, S.emitMappingSymbol(Sec.isText() ? "$x" : "$d")); + } + if (Syms.size() != NumSyms) { + SmallVector<const MCSymbol *, 0> NewSyms; + DenseMap<MCSection *, size_t> Cnt; + Syms.truncate(NumSyms); + for (const MCSymbol *Sym : Syms) + if (Sym->isInSection()) + ++Cnt[&Sym->getSection()]; + SmallVector<size_t, 0> Idx; + for (auto [I, Sym] : llvm::enumerate(Syms)) { + NewSyms.push_back(Sym); + MCSection *Sec = Sym->isInSection() ? &Sym->getSection() : nullptr; + if (!Sec || --Cnt[Sec]) + continue; + // `Sym` is the last symbol relative to `Sec`. Add the ending mapping + // symbol, if needed, after `Sym`. + if (auto *MapSym = EndMappingSym.lookup(Sec)) { + NewSyms.push_back(MapSym); + Idx.push_back(I); + } + } + Syms = std::move(NewSyms); + // F.second holds the number of symbols added before the FILE symbol. + // Take into account the inserted mapping symbols. + for (auto &F : S.getWriter().getFileNames()) + F.second += llvm::lower_bound(Idx, F.second) - Idx.begin(); + } + } + MCSectionELF *MemtagSec = nullptr; for (const MCSymbol &Symbol : Asm.symbols()) { const auto &Sym = cast<MCSymbolELF>(Symbol); diff --git a/llvm/test/MC/AArch64/mapping-across-sections.s b/llvm/test/MC/AArch64/mapping-across-sections.s index f453c86d45fb62..e688c770cc960d 100644 --- a/llvm/test/MC/AArch64/mapping-across-sections.s +++ b/llvm/test/MC/AArch64/mapping-across-sections.s @@ -1,5 +1,10 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-objdump -t - | FileCheck %s --match-full-lines +// RUN: llvm-mc -triple=aarch64 -filetype=obj -implicit-mapsyms %s | llvm-objdump -t - | FileCheck %s --check-prefix=CHECK1 --match-full-lines +/// The test covers many state transitions. Let's use the first state and the last state to describe a section. +/// .text goes through cd -> dd -> cc -> dd. +/// .data goes through dd -> dc -> cd. +.file "0.s" .section .text1,"ax" add w0, w0, w0 @@ -12,29 +17,61 @@ add w0, w0, w0 .popsection .text -add w1, w1, w1 +.word 42 .section .text1,"ax" add w1, w1, w1 +.text +add w1, w1, w1 + +.section .data,"aw" +.word 42 +add w0, w0, w0 + .text .word 42 +## .rodata and subsequent symbols should be after the FILE symbol of "1.s". +.file "1.s" .section .rodata,"a" .word 42 add w0, w0, w0 +.section .data,"aw" +add w0, w0, w0 +.word 42 + +.text + .ident "clang" .section ".note.GNU-stack","",@progbits // CHECK: SYMBOL TABLE: -// CHECK-NEXT: 0000000000000000 l .text1 0000000000000000 $x -// CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $x -// CHECK-NEXT: 0000000000000004 l .text 0000000000000000 $d -// CHECK-NEXT: 0000000000000000 l .data 0000000000000000 $d -// CHECK-NEXT: 0000000000000008 l .text 0000000000000000 $x -// CHECK-NEXT: 000000000000000c l .text 0000000000000000 $d -// CHECK-NEXT: 0000000000000000 l .rodata 0000000000000000 $d -// CHECK-NEXT: 0000000000000004 l .rodata 0000000000000000 $x -// CHECK-NEXT: 0000000000000000 l .comment 0000000000000000 $d +// CHECK-NEXT: 0000000000000000 l df *ABS* 0000000000000000 0.s +// CHECK-NEXT: 0000000000000000 l .text1 0000000000000000 $x +// CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $x +// CHECK-NEXT: 0000000000000004 l .text 0000000000000000 $d +// CHECK-NEXT: 0000000000000000 l .data 0000000000000000 $d +// CHECK-NEXT: 000000000000000c l .text 0000000000000000 $x +// CHECK-NEXT: 0000000000000008 l .data 0000000000000000 $x +// CHECK-NEXT: 0000000000000010 l .text 0000000000000000 $d +// CHECK-NEXT: 0000000000000000 l df *ABS* 0000000000000000 1.s +// CHECK-NEXT: 0000000000000000 l .rodata 0000000000000000 $d +// CHECK-NEXT: 0000000000000004 l .rodata 0000000000000000 $x +// CHECK-NEXT: 0000000000000010 l .data 0000000000000000 $d +// CHECK-NEXT: 0000000000000000 l .comment 0000000000000000 $d // CHECK-NOT: {{.}} + +// CHECK1: SYMBOL TABLE: +// CHECK1-NEXT: 0000000000000000 l df *ABS* 0000000000000000 0.s +// CHECK1-NEXT: 0000000000000004 l .text 0000000000000000 $d +// CHECK1-NEXT: 000000000000000c l .text 0000000000000000 $x +// CHECK1-NEXT: 0000000000000008 l .data 0000000000000000 $x +// CHECK1-NEXT: 0000000000000010 l .text 0000000000000000 $d +// CHECK1-NEXT: 0000000000000014 l .text 0000000000000000 $x +// CHECK1-NEXT: 0000000000000000 l df *ABS* 0000000000000000 1.s +// CHECK1-NEXT: 0000000000000004 l .rodata 0000000000000000 $x +// CHECK1-NEXT: 0000000000000008 l .rodata 0000000000000000 $d +// CHECK1-NEXT: 0000000000000010 l .data 0000000000000000 $d +// CHECK1-NOT: {{.}} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits