https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/164471
>From c6ef435f628b964949b72cd6b78199493be9dfa9 Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Tue, 21 Oct 2025 11:19:30 -0700 Subject: [PATCH 1/6] Enable LLDB to load large dSYM files. llvm-dsymutil can produce mach-o files where some sections in __DWARF exceed the 4GB barrier and subsequent sections in the dSYM will be inaccessible because the mach-o section_64 structure only has a 32 bit file offset. This patch enables LLDB to load a large dSYM file by figuring out when this happens and properly adjusting the file offset of the LLDB sections. I was unable to add a test as obj2yaml and yaml2obj are broken for mach-o files and they can't convert a yaml file back into a valid mach-o object file. Any suggestions for adding a test would be appreciated. --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 9cdb8467bfc60..6878f7331e0f5 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1674,6 +1674,10 @@ void ObjectFileMachO::ProcessSegmentCommand( uint32_t segment_sect_idx; const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; + // dSYM files can create sections whose data exceeds the 4GB barrier, but + // mach-o sections only have 32 bit offsets. So keep track of when we + // overflow and fix the sections offsets as we iterate. + uint64_t section_offset_adjust = 0; const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; ++segment_sect_idx) { @@ -1697,6 +1701,14 @@ void ObjectFileMachO::ProcessSegmentCommand( // isn't stored in the abstracted Sections. m_mach_sections.push_back(sect64); + // Make sure we can load dSYM files whose __DWARF sections exceed the 4GB + // barrier. llvm::MachO::section_64 have only 32 bit file offsets for the + // section contents. + const uint64_t section_file_offset = sect64.offset + section_offset_adjust; + // If this section overflows a 4GB barrier, then we need to adjust any + // subsequent the section offsets. + if (is_dsym && ((uint64_t)sect64.offset + sect64.size) >= UINT32_MAX) + section_offset_adjust += 0x100000000ull; if (add_section) { ConstString section_name( sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); @@ -1736,13 +1748,13 @@ void ObjectFileMachO::ProcessSegmentCommand( } // Grow the section size as needed. - if (sect64.offset) { + if (section_file_offset) { const lldb::addr_t segment_min_file_offset = segment->GetFileOffset(); const lldb::addr_t segment_max_file_offset = segment_min_file_offset + segment->GetFileSize(); - const lldb::addr_t section_min_file_offset = sect64.offset; + const lldb::addr_t section_min_file_offset = section_file_offset; const lldb::addr_t section_max_file_offset = section_min_file_offset + sect64.size; const lldb::addr_t new_file_offset = @@ -1770,9 +1782,9 @@ void ObjectFileMachO::ProcessSegmentCommand( sect64.addr, // File VM address == addresses as they are // found in the object file sect64.size, // VM size in bytes of this section - sect64.offset, // Offset to the data for this section in + section_file_offset, // Offset to the data for this section in // the file - sect64.offset ? sect64.size : 0, // Size in bytes of + section_file_offset ? sect64.size : 0, // Size in bytes of // this section as // found in the file sect64.align, @@ -1792,14 +1804,14 @@ void ObjectFileMachO::ProcessSegmentCommand( SectionSP section_sp(new Section( segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, - sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, - sect64.flags)); + section_file_offset, section_file_offset == 0 ? 0 : sect64.size, + sect64.align, sect64.flags)); // Set the section to be encrypted to match the segment bool section_is_encrypted = false; if (!segment_is_encrypted && load_cmd.filesize != 0) section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( - sect64.offset) != nullptr; + section_file_offset) != nullptr; section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); section_sp->SetPermissions(segment_permissions); >From 4abf6b799bf6c0d5ee04e21a029fac167a2fb67e Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Fri, 24 Oct 2025 11:40:58 -0700 Subject: [PATCH 2/6] Address review comments. - Fix a case where a section can be larger that 4GB - Fix comments to be a bit more clear - Don't only do this for dSYM files --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 6878f7331e0f5..1040b58d767b6 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1674,9 +1674,9 @@ void ObjectFileMachO::ProcessSegmentCommand( uint32_t segment_sect_idx; const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; - // dSYM files can create sections whose data exceeds the 4GB barrier, but - // mach-o sections only have 32 bit offsets. So keep track of when we - // overflow and fix the sections offsets as we iterate. + // 64 bit mach-o files have sections with 32 bit file offsets. If any section + // data end will exceed UINT32_MAX, then we need to do some bookkeeping to + // ensure we can access this data correctly. uint64_t section_offset_adjust = 0; const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; @@ -1701,14 +1701,15 @@ void ObjectFileMachO::ProcessSegmentCommand( // isn't stored in the abstracted Sections. m_mach_sections.push_back(sect64); - // Make sure we can load dSYM files whose __DWARF sections exceed the 4GB - // barrier. llvm::MachO::section_64 have only 32 bit file offsets for the - // section contents. + // Make sure we can load sections in mach-o files where some sections cross + // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets + // for the file offset of the section contents, so we need to track and + // sections that overflow and adjust the offsets accordingly. const uint64_t section_file_offset = sect64.offset + section_offset_adjust; - // If this section overflows a 4GB barrier, then we need to adjust any - // subsequent the section offsets. - if (is_dsym && ((uint64_t)sect64.offset + sect64.size) >= UINT32_MAX) - section_offset_adjust += 0x100000000ull; + const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size; + if (end_section_offset >= UINT32_MAX) + section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull; + if (add_section) { ConstString section_name( sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); >From 63ab43a95e887fec7aaae45d4b990fe129070e5c Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Fri, 24 Oct 2025 11:45:13 -0700 Subject: [PATCH 3/6] Run clang format. --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 1040b58d767b6..caf2d66315748 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1782,7 +1782,7 @@ void ObjectFileMachO::ProcessSegmentCommand( // other sections. sect64.addr, // File VM address == addresses as they are // found in the object file - sect64.size, // VM size in bytes of this section + sect64.size, // VM size in bytes of this section section_file_offset, // Offset to the data for this section in // the file section_file_offset ? sect64.size : 0, // Size in bytes of >From dd738a54b4809ce6219b6f9a388e12b0930099b2 Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Fri, 24 Oct 2025 15:10:54 -0700 Subject: [PATCH 4/6] Cast to uint64_t to be clear what we are doing. --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index caf2d66315748..29285facaa475 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1705,7 +1705,7 @@ void ObjectFileMachO::ProcessSegmentCommand( // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets // for the file offset of the section contents, so we need to track and // sections that overflow and adjust the offsets accordingly. - const uint64_t section_file_offset = sect64.offset + section_offset_adjust; + const uint64_t section_file_offset = (uint64_t)sect64.offset + section_offset_adjust; const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size; if (end_section_offset >= UINT32_MAX) section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull; >From 78723b8431fd903db25909ddc73823839794c398 Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Fri, 24 Oct 2025 15:11:51 -0700 Subject: [PATCH 5/6] clang format. --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 29285facaa475..c8e520d687f67 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1705,7 +1705,8 @@ void ObjectFileMachO::ProcessSegmentCommand( // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets // for the file offset of the section contents, so we need to track and // sections that overflow and adjust the offsets accordingly. - const uint64_t section_file_offset = (uint64_t)sect64.offset + section_offset_adjust; + const uint64_t section_file_offset = + (uint64_t)sect64.offset + section_offset_adjust; const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size; if (end_section_offset >= UINT32_MAX) section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull; >From 67645d8e41f3a8704089dd6e8dc9766db0a2d2d4 Mon Sep 17 00:00:00 2001 From: Greg Clayton <[email protected]> Date: Fri, 24 Oct 2025 15:58:02 -0700 Subject: [PATCH 6/6] Add a test. The binary is as minimal as possible and it contains 1 segment named "__DWARF" with 3 sections: FILE OFF INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME =========== ===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------------- 0x00000068: [ 1] 0x00000000fffffff0 0x0000000000000020 0xfffffff0 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_abbrev 0x000000b8: [ 2] 0x0000000100000010 0x0000000200000000 0x00000010 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_info 0x00000108: [ 3] 0x0000000300000010 0x0000000000000020 0x00000010 0x00000002 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000000 __DWARF.__debug_line The file offsets should be parsed correctly by LLDB as: __debug_abbrev file_offset=0x00000000fffffff0 __debug_info file_offset=0x0000000100000010 __debug_line file_offset=0x0000000300000010 --- .../MachO/Inputs/section-overflow-binary | Bin 0 -> 344 bytes .../ObjectFile/MachO/section-overflow-binary.test | 13 +++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary create mode 100644 lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test diff --git a/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary new file mode 100644 index 0000000000000000000000000000000000000000..19dc2f4ac9ffe55c414b1f37817099f07846ad00 GIT binary patch literal 344 zcmX^A>+L@t1_nk3Am9RG5W@mUv@$3FSqu!4Kn&u?$Ge0(2DyR7K*Wdt|Np}{Oh|kO zs31F#W&>hoC_g?vB{iuuJw7ohsVKD!w|NRs^&s;>=E4BTUC01tUS?ieK1dwheF8u+ em>!ThAU-kX<z(ii0+nJl59AjGkR%WgG7kV(1ucvK literal 0 HcmV?d00001 diff --git a/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test new file mode 100644 index 0000000000000..76c335f65a76a --- /dev/null +++ b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test @@ -0,0 +1,13 @@ +RUN: %lldb -b %p/Inputs/section-overflow-binary \ +RUN: -o 'script dwarf = lldb.target.module[0].sections[0]' \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(0)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(1)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(2)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: | FileCheck %s + +CHECK: __debug_abbrev file_offset=0x00000000fffffff0 +CHECK: __debug_info file_offset=0x0000000100000010 +CHECK: __debug_line file_offset=0x0000000300000010 _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
