jasonmolenda created this revision.
jasonmolenda added a project: LLDB.
Herald added subscribers: omjavaid, danielkiss, JDevlieghere, kristof.beyls.
jasonmolenda requested review of this revision.
lldb's disassembler for arm64 / aarch64 doesn't recognize the ADRP+ADD
instruction pair often used to calculate pc-relative addresses, and symbolicate
what is being pointed to, making it hard to understand the assembly; doing the
computation manually is annoying. This patch adds a bit of arm64-specific
knowledge to the disassembly symbolicator to remember the state and compute the
address being specified when the target is aarch64.
I'm fixing the other thing that I dislike the most about lldb's arm64
disassembly in the llvm aarch64 instruction printer, via
https://reviews.llvm.org/D107196 .
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D107213
Files:
lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
lldb/test/API/functionalities/disassemble/aarch64-adrp-add/TestAArch64AdrpAdd.py
lldb/test/API/functionalities/disassemble/aarch64-adrp-add/a.out.yaml
lldb/test/API/functionalities/disassemble/aarch64-adrp-add/main.c
Index: lldb/test/API/functionalities/disassemble/aarch64-adrp-add/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/functionalities/disassemble/aarch64-adrp-add/main.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+int main() {
+ puts("HI");
+ return 0;
+}
Index: lldb/test/API/functionalities/disassemble/aarch64-adrp-add/a.out.yaml
===================================================================
--- /dev/null
+++ lldb/test/API/functionalities/disassemble/aarch64-adrp-add/a.out.yaml
@@ -0,0 +1,387 @@
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x100000C
+ cpusubtype: 0x0
+ filetype: 0x2
+ ncmds: 17
+ sizeofcmds: 1384
+ flags: 0x200085
+ reserved: 0x0
+LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __PAGEZERO
+ vmaddr: 0
+ vmsize: 4294967296
+ fileoff: 0
+ filesize: 0
+ maxprot: 0
+ initprot: 0
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 472
+ segname: __TEXT
+ vmaddr: 4294967296
+ vmsize: 16384
+ fileoff: 0
+ filesize: 16384
+ maxprot: 5
+ initprot: 5
+ nsects: 5
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x100003F4C
+ size: 56
+ offset: 0x3F4C
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: FF8300D1FD7B01A9FD43009108008052BFC31FB80000009000D03E91E80B00B906000094E80B40B9E00308AAFD7B41A9FF830091C0035FD6
+ - sectname: __stubs
+ segname: __TEXT
+ addr: 0x100003F84
+ size: 12
+ offset: 0x3F84
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000408
+ reserved1: 0x0
+ reserved2: 0xC
+ reserved3: 0x0
+ content: 1F2003D5D003025800021FD6
+ - sectname: __stub_helper
+ segname: __TEXT
+ addr: 0x100003F90
+ size: 36
+ offset: 0x3F90
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: D10302101F2003D5F047BFA91F2003D51003005800021FD650000018F9FFFF1700000000
+ - sectname: __cstring
+ segname: __TEXT
+ addr: 0x100003FB4
+ size: 3
+ offset: 0x3FB4
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x2
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: '484900'
+ - sectname: __unwind_info
+ segname: __TEXT
+ addr: 0x100003FB8
+ size: 72
+ offset: 0x3FB8
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 010000001C000000000000001C000000000000001C000000020000004C3F00003400000034000000853F00000000000034000000030000000C000100100001000000000000000004
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __DATA_CONST
+ vmaddr: 4294983680
+ vmsize: 16384
+ fileoff: 16384
+ filesize: 16384
+ maxprot: 3
+ initprot: 3
+ nsects: 1
+ flags: 16
+ Sections:
+ - sectname: __got
+ segname: __DATA_CONST
+ addr: 0x100004000
+ size: 8
+ offset: 0x4000
+ align: 3
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x6
+ reserved1: 0x1
+ reserved2: 0x0
+ reserved3: 0x0
+ content: '0000000000000000'
+ - cmd: LC_SEGMENT_64
+ cmdsize: 232
+ segname: __DATA
+ vmaddr: 4295000064
+ vmsize: 16384
+ fileoff: 32768
+ filesize: 16384
+ maxprot: 3
+ initprot: 3
+ nsects: 2
+ flags: 0
+ Sections:
+ - sectname: __la_symbol_ptr
+ segname: __DATA
+ addr: 0x100008000
+ size: 8
+ offset: 0x8000
+ align: 3
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x7
+ reserved1: 0x2
+ reserved2: 0x0
+ reserved3: 0x0
+ content: A83F000001000000
+ - sectname: __data
+ segname: __DATA
+ addr: 0x100008008
+ size: 8
+ offset: 0x8008
+ align: 3
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: '0000000000000000'
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 4295016448
+ vmsize: 16384
+ fileoff: 49152
+ filesize: 802
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ - cmd: LC_DYLD_INFO_ONLY
+ cmdsize: 48
+ rebase_off: 49152
+ rebase_size: 8
+ bind_off: 49160
+ bind_size: 24
+ weak_bind_off: 0
+ weak_bind_size: 0
+ lazy_bind_off: 49184
+ lazy_bind_size: 16
+ export_off: 49200
+ export_size: 48
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 49256
+ nsyms: 5
+ stroff: 49352
+ strsize: 72
+ - cmd: LC_DYSYMTAB
+ cmdsize: 80
+ ilocalsym: 0
+ nlocalsym: 1
+ iextdefsym: 1
+ nextdefsym: 2
+ iundefsym: 3
+ nundefsym: 2
+ tocoff: 0
+ ntoc: 0
+ modtaboff: 0
+ nmodtab: 0
+ extrefsymoff: 0
+ nextrefsyms: 0
+ indirectsymoff: 49336
+ nindirectsyms: 3
+ extreloff: 0
+ nextrel: 0
+ locreloff: 0
+ nlocrel: 0
+ - cmd: LC_LOAD_DYLINKER
+ cmdsize: 32
+ name: 12
+ Content: '/usr/lib/dyld'
+ ZeroPadBytes: 7
+ - cmd: LC_UUID
+ cmdsize: 24
+ uuid: 84E38FAD-F165-30FB-B627-572FEC028CC7
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 32
+ platform: 1
+ minos: 720896
+ sdk: 721664
+ ntools: 1
+ Tools:
+ - tool: 3
+ version: 42599936
+ - cmd: LC_SOURCE_VERSION
+ cmdsize: 16
+ version: 0
+ - cmd: LC_MAIN
+ cmdsize: 24
+ entryoff: 16204
+ stacksize: 0
+ - cmd: LC_LOAD_DYLIB
+ cmdsize: 56
+ dylib:
+ name: 24
+ timestamp: 2
+ current_version: 84698117
+ compatibility_version: 65536
+ Content: '/usr/lib/libSystem.B.dylib'
+ ZeroPadBytes: 6
+ - cmd: LC_FUNCTION_STARTS
+ cmdsize: 16
+ dataoff: 49248
+ datasize: 8
+ - cmd: LC_DATA_IN_CODE
+ cmdsize: 16
+ dataoff: 49256
+ datasize: 0
+ - cmd: LC_CODE_SIGNATURE
+ cmdsize: 16
+ dataoff: 49424
+ datasize: 530
+LinkEditData:
+ RebaseOpcodes:
+ - Opcode: REBASE_OPCODE_SET_TYPE_IMM
+ Imm: 1
+ - Opcode: REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ Imm: 3
+ ExtraData: [ 0x0 ]
+ - Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES
+ Imm: 1
+ - Opcode: REBASE_OPCODE_DONE
+ Imm: 0
+ BindOpcodes:
+ - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
+ Imm: 1
+ Symbol: ''
+ - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
+ Imm: 0
+ Symbol: dyld_stub_binder
+ - Opcode: BIND_OPCODE_SET_TYPE_IMM
+ Imm: 1
+ Symbol: ''
+ - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ Imm: 2
+ ULEBExtraData: [ 0x0 ]
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DO_BIND
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ LazyBindOpcodes:
+ - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ Imm: 3
+ ULEBExtraData: [ 0x0 ]
+ Symbol: ''
+ - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
+ Imm: 1
+ Symbol: ''
+ - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
+ Imm: 0
+ Symbol: _puts
+ - Opcode: BIND_OPCODE_DO_BIND
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ - Opcode: BIND_OPCODE_DONE
+ Imm: 0
+ Symbol: ''
+ ExportTrie:
+ TerminalSize: 0
+ NodeOffset: 0
+ Name: ''
+ Flags: 0x0
+ Address: 0x0
+ Other: 0x0
+ ImportName: ''
+ Children:
+ - TerminalSize: 0
+ NodeOffset: 5
+ Name: _
+ Flags: 0x0
+ Address: 0x0
+ Other: 0x0
+ ImportName: ''
+ Children:
+ - TerminalSize: 2
+ NodeOffset: 33
+ Name: _mh_execute_header
+ Flags: 0x0
+ Address: 0x0
+ Other: 0x0
+ ImportName: ''
+ - TerminalSize: 3
+ NodeOffset: 37
+ Name: main
+ Flags: 0x0
+ Address: 0x3F4C
+ Other: 0x0
+ ImportName: ''
+ NameList:
+ - n_strx: 51
+ n_type: 0xE
+ n_sect: 8
+ n_desc: 0
+ n_value: 4295000072
+ - n_strx: 2
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 16
+ n_value: 4294967296
+ - n_strx: 22
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 4294983500
+ - n_strx: 28
+ n_type: 0x1
+ n_sect: 0
+ n_desc: 256
+ n_value: 0
+ - n_strx: 34
+ n_type: 0x1
+ n_sect: 0
+ n_desc: 256
+ n_value: 0
+ StringTable:
+ - ' '
+ - __mh_execute_header
+ - _main
+ - _puts
+ - dyld_stub_binder
+ - __dyld_private
+ - ''
+ - ''
+ - ''
+ - ''
+ - ''
+ - ''
+...
Index: lldb/test/API/functionalities/disassemble/aarch64-adrp-add/TestAArch64AdrpAdd.py
===================================================================
--- /dev/null
+++ lldb/test/API/functionalities/disassemble/aarch64-adrp-add/TestAArch64AdrpAdd.py
@@ -0,0 +1,41 @@
+"""
+Test that the arm64 ADRP + ADD pc-relative addressing pair is symbolicated.
+"""
+
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestCase(TestBase):
+
+ mydir = TestBase.compute_mydir(__file__)
+
+ @no_debug_info_test
+ def test(self):
+ src_dir = self.getSourceDir()
+ yaml_path = os.path.join(src_dir, "a.out.yaml")
+ yaml_base, ext = os.path.splitext(yaml_path)
+ obj_path = self.getBuildArtifact("a.out")
+ self.yaml2obj(yaml_path, obj_path)
+
+ target = self.dbg.CreateTarget(obj_path)
+ self.assertTrue(target, VALID_TARGET)
+
+ mains = target.FindFunctions("main")
+ for f in mains.symbols:
+ if f.GetStartAddress().GetModule().GetFileSpec().GetFilename() == "a.out":
+ insns = f.GetInstructions(target)
+ found_hi_string = False
+
+ # The binary has an ADRP + ADD instruction pair which load
+ # the pc-relative address of a c-string. lldb should show
+ # that c-string in the disassembly in the comment field.
+ for i in insns:
+ if "HI" in i.GetComment(target):
+ found_hi_string = True
+ if found_hi_string == False and self.TraceOn():
+ strm = lldb.SBStream()
+ insns.GetDescription(strm)
+ print('Disassembly of main(), looking for "HI" in comments:')
+ print(strm.GetData())
+ self.assertTrue(found_hi_string)
Index: lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
===================================================================
--- lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
+++ lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
@@ -73,6 +73,8 @@
InstructionLLVMC *m_inst;
std::mutex m_mutex;
bool m_data_from_file;
+ lldb::addr_t m_adrp_address;
+ uint32_t m_adrp_insn;
// Since we need to make two actual MC Disassemblers for ARM (ARM & THUMB),
// and there's a bit of goo to set up and own in the MC disassembler world,
Index: lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
===================================================================
--- lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
+++ lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
@@ -1030,7 +1030,8 @@
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
const char *flavor_string)
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
- m_data_from_file(false) {
+ m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
+ m_adrp_insn(0) {
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
m_flavor.assign("default");
}
@@ -1310,6 +1311,40 @@
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
Address value_so_addr;
Address pc_so_addr;
+ if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
+ target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
+ if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
+ m_adrp_address = pc;
+ m_adrp_insn = value;
+ *name = nullptr;
+ *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
+ return nullptr;
+ }
+ // If previous instruction was ADRP and this is ADD, and it's to
+ // the same register, this is pc-relative address calculation.
+ if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
+ m_adrp_address == pc - 4 &&
+ (m_adrp_insn & 0x9f000000) == 0x90000000 &&
+ (m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {
+ // Bitmasking lifted from MachODump.cpp's SymbolizerSymbolLookUp.
+ uint32_t addxri_inst;
+ uint64_t adrp_imm, addxri_imm;
+ adrp_imm =
+ ((m_adrp_insn & 0x00ffffe0) >> 3) | ((m_adrp_insn >> 29) & 0x3);
+ if (m_adrp_insn & 0x0200000)
+ adrp_imm |= 0xfffffffffc000000LL;
+
+ addxri_inst = value;
+ addxri_imm = (addxri_inst >> 10) & 0xfff;
+ if (((addxri_inst >> 22) & 0x3) == 1)
+ addxri_imm <<= 12;
+ value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
+ addxri_imm;
+ }
+ m_adrp_address = LLDB_INVALID_ADDRESS;
+ m_adrp_insn = 0;
+ }
+
if (m_inst->UsingFileAddress()) {
ModuleSP module_sp(m_inst->GetAddress().GetModule());
if (module_sp) {
@@ -1371,6 +1406,12 @@
}
}
+ // TODO: llvm-objdump sets the type_ptr to the
+ // LLVMDisassembler_ReferenceType_Out_* values
+ // based on where value_so_addr is pointing, with
+ // Mach-O specific augmentations in MachODump.cpp. e.g.
+ // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
+ // handles.
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
*name = nullptr;
return nullptr;
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits