Author: Jez Ng Date: 2020-12-20T13:49:18-05:00 New Revision: 5f9896d3b23c31beecf225d90194e1bf4e097677
URL: https://github.com/llvm/llvm-project/commit/5f9896d3b23c31beecf225d90194e1bf4e097677 DIFF: https://github.com/llvm/llvm-project/commit/5f9896d3b23c31beecf225d90194e1bf4e097677.diff LOG: [lld-macho] Support Obj-C symbols in order files Obj-C symbols may have spaces and colons, which our previous order file parser would be confused by. The order file format has made the very unfortunate choice of using colons for its delimiters, which means that we have to use heuristics to determine if a given colon is part of a symbol or not... Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D93567 Added: Modified: lld/MachO/Driver.cpp lld/test/MachO/order-file.s Removed: lld/test/MachO/invalid/order-file-bad-arch.test lld/test/MachO/invalid/order-file-bad-objfile.test ################################################################################ diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 63d101270cf5..9838e67cd4a2 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -393,19 +393,15 @@ static void addFileList(StringRef path) { addFile(path, false); } -static std::array<StringRef, 6> archNames{"arm", "arm64", "i386", - "x86_64", "ppc", "ppc64"}; -static bool isArchString(StringRef s) { - static DenseSet<StringRef> archNamesSet(archNames.begin(), archNames.end()); - return archNamesSet.find(s) != archNamesSet.end(); -} - // An order file has one entry per line, in the following format: // -// <arch>:<object file>:<symbol name> +// <cpu>:<object file>:<symbol name> // -// <arch> and <object file> are optional. If not specified, then that entry -// matches any symbol of that name. +// <cpu> and <object file> are optional. If not specified, then that entry +// matches any symbol of that name. Parsing this format is not quite +// straightforward because the symbol name itself can contain colons, so when +// encountering a colon, we consider the preceding characters to decide if it +// can be a valid CPU type or file path. // // If a symbol is matched by multiple entries, then it takes the lowest-ordered // entry (the one nearest to the front of the list.) @@ -420,59 +416,37 @@ static void parseOrderFile(StringRef path) { MemoryBufferRef mbref = *buffer; size_t priority = std::numeric_limits<size_t>::max(); - for (StringRef rest : args::getLines(mbref)) { - StringRef arch, objectFile, symbol; - - std::array<StringRef, 3> fields; - uint8_t fieldCount = 0; - while (rest != "" && fieldCount < 3) { - std::pair<StringRef, StringRef> p = getToken(rest, ": \t\n\v\f\r"); - StringRef tok = p.first; - rest = p.second; - - // Check if we have a comment - if (tok == "" || tok[0] == '#') - break; - - fields[fieldCount++] = tok; - } - - switch (fieldCount) { - case 3: - arch = fields[0]; - objectFile = fields[1]; - symbol = fields[2]; - break; - case 2: - (isArchString(fields[0]) ? arch : objectFile) = fields[0]; - symbol = fields[1]; - break; - case 1: - symbol = fields[0]; - break; - case 0: - break; - default: - llvm_unreachable("too many fields in order file"); - } + for (StringRef line : args::getLines(mbref)) { + StringRef objectFile, symbol; + line = line.take_until([](char c) { return c == '#'; }); // ignore comments + line = line.ltrim(); + + CPUType cpuType = StringSwitch<CPUType>(line) + .StartsWith("i386:", CPU_TYPE_I386) + .StartsWith("x86_64:", CPU_TYPE_X86_64) + .StartsWith("arm:", CPU_TYPE_ARM) + .StartsWith("arm64:", CPU_TYPE_ARM64) + .StartsWith("ppc:", CPU_TYPE_POWERPC) + .StartsWith("ppc64:", CPU_TYPE_POWERPC64) + .Default(CPU_TYPE_ANY); + // Drop the CPU type as well as the colon + if (cpuType != CPU_TYPE_ANY) + line = line.drop_until([](char c) { return c == ':'; }).drop_front(); + // TODO: Update when we extend support for other CPUs + if (cpuType != CPU_TYPE_ANY && cpuType != CPU_TYPE_X86_64) + continue; - if (!arch.empty()) { - if (!isArchString(arch)) { - error("invalid arch \"" + arch + "\" in order file: expected one of " + - llvm::join(archNames, ", ")); - continue; + constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"}; + for (StringRef fileEnd : fileEnds) { + size_t pos = line.find(fileEnd); + if (pos != StringRef::npos) { + // Split the string around the colon + objectFile = line.take_front(pos + fileEnd.size() - 1); + line = line.drop_front(pos + fileEnd.size()); + break; } - - // TODO: Update when we extend support for other archs - if (arch != "x86_64") - continue; - } - - if (!objectFile.empty() && !objectFile.endswith(".o")) { - error("invalid object file name \"" + objectFile + - "\" in order file: should end with .o"); - continue; } + symbol = line.trim(); if (!symbol.empty()) { SymbolPriorityEntry &entry = config->priorities[symbol]; diff --git a/lld/test/MachO/invalid/order-file-bad-arch.test b/lld/test/MachO/invalid/order-file-bad-arch.test deleted file mode 100644 index 06b37b017f0b..000000000000 --- a/lld/test/MachO/invalid/order-file-bad-arch.test +++ /dev/null @@ -1,9 +0,0 @@ -# REQUIRES: x86 -# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o -# RUN: not %lld -o %t %t.o -order_file %s 2>&1 | FileCheck %s -# CHECK: error: invalid arch "sparc" in order file: expected one of arm, arm64, i386, x86_64, ppc, ppc64 -# CHECK-EMPTY: - -_barsymbol -sparc:hello.o:_foosymbol -i386:hello.o:_foosymbol diff --git a/lld/test/MachO/invalid/order-file-bad-objfile.test b/lld/test/MachO/invalid/order-file-bad-objfile.test deleted file mode 100644 index 546e68832f13..000000000000 --- a/lld/test/MachO/invalid/order-file-bad-objfile.test +++ /dev/null @@ -1,10 +0,0 @@ -# REQUIRES: x86 -# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o -# RUN: not %lld -o %t %t.o -order_file %s 2>&1 | FileCheck %s -# CHECK: invalid object file name "helloo" in order file: should end with .o -# CHECK: invalid object file name "z80" in order file: should end with .o -# CHECK-EMPTY: - -_barsymbol -x86_64:helloo:_foosymbol -z80:_foosymbol diff --git a/lld/test/MachO/order-file.s b/lld/test/MachO/order-file.s index 673343935d3f..a13abe73efb9 100644 --- a/lld/test/MachO/order-file.s +++ b/lld/test/MachO/order-file.s @@ -5,11 +5,11 @@ # RUN: rm -f %t/foo.a # RUN: llvm-ar rcs %t/foo.a %t/foo.o -# FOO-FIRST: <_foo>: +# FOO-FIRST: <_bar>: # FOO-FIRST: <_main>: # FOO-SECOND: <_main>: -# FOO-SECOND: <_foo>: +# FOO-SECOND: <_bar>: # RUN: %lld -lSystem -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-1 # RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST @@ -83,9 +83,9 @@ # RUN: %lld -lSystem -o %t/test-4 %t/foo.o %t/test.o -order_file %t/ord-multiple-4 # RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST -## _foo and _bar both point to the same location. When both symbols appear in -## an order file, the location in question should be ordered according to the -## lowest-ordered symbol that references it. +## -[Foo doFoo:andBar:] and _bar both point to the same location. When both +## symbols appear in an order file, the location in question should be ordered +## according to the lowest-ordered symbol that references it. # RUN: %lld -lSystem -o %t/test-alias %t/test.o %t/foo.o -order_file %t/ord-alias # RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST @@ -93,63 +93,63 @@ # RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST #--- ord-1 -_foo # just a comment +-[Foo doFoo:andBar:] # just a comment _main # another comment #--- ord-2 _main # just a comment -_foo # another comment +-[Foo doFoo:andBar:] # another comment #--- ord-file-match -foo.o:_foo +foo.o:-[Foo doFoo:andBar:] _main #--- ord-file-nomatch -bar.o:_foo +bar.o:-[Foo doFoo:andBar:] _main -_foo +-[Foo doFoo:andBar:] #--- ord-arch-match -x86_64:_foo +x86_64:-[Foo doFoo:andBar:] _main #--- ord-arch-nomatch -ppc:_foo +ppc:-[Foo doFoo:andBar:] _main -_foo +-[Foo doFoo:andBar:] #--- ord-arch-file-match -x86_64:bar.o:_foo +x86_64:bar.o:-[Foo doFoo:andBar:] _main #--- ord-multiple-1 -_foo +-[Foo doFoo:andBar:] _main -foo.o:_foo +foo.o:-[Foo doFoo:andBar:] #--- ord-multiple-2 -foo.o:_foo +foo.o:-[Foo doFoo:andBar:] _main -_foo +-[Foo doFoo:andBar:] #--- ord-multiple-3 -_foo +-[Foo doFoo:andBar:] _main -_foo +-[Foo doFoo:andBar:] #--- ord-multiple-4 -foo.o:_foo +foo.o:-[Foo doFoo:andBar:] _main -foo.o:_foo +foo.o:-[Foo doFoo:andBar:] #--- ord-alias _bar _main -_foo +-[Foo doFoo:andBar:] #--- foo.s -.globl _foo -_foo: +.globl "-[Foo doFoo:andBar:]" +"-[Foo doFoo:andBar:]": _bar: ret @@ -157,5 +157,5 @@ _bar: .globl _main _main: - callq _foo + callq "-[Foo doFoo:andBar:]" ret _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits