ikudrin created this revision. ikudrin added reviewers: john.brawn, daltenty, jsji, simon_tatham, tmatheson, mstorsjo, phosek. ikudrin added projects: LLVM, clang. Herald added subscribers: ekilmer, inglorion. Herald added a project: All. ikudrin requested review of this revision.
As for now, `extract_symbols.py` uses a predefined set of tools, none of which can read bitcode files. The patch makes it possible to override the used tool and passes a fresh built `llvm-nm` for that for multi-staged LTO builds. This fixes building plugins with LTO builds and subsequently makes `clang/test/Frontend/plugin-*` tests pass. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D149119 Files: clang/CMakeLists.txt llvm/utils/extract_symbols.py
Index: llvm/utils/extract_symbols.py =================================================================== --- llvm/utils/extract_symbols.py +++ llvm/utils/extract_symbols.py @@ -29,8 +29,8 @@ # as, especially on Windows, waiting for the entire output to be ready can take # a significant amount of time. -def dumpbin_get_symbols(lib): - process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1, +def dumpbin_get_symbols(tool, lib): + process = subprocess.Popen([tool,'/symbols',lib], bufsize=1, stdout=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) process.stdin.close() @@ -41,10 +41,10 @@ yield (match.group(2), match.group(1) != "UNDEF") process.wait() -def nm_get_symbols(lib): +def nm_get_symbols(tool, lib): # -P means the output is in portable format, and -g means we only get global # symbols. - cmd = ['nm','-P','-g'] + cmd = [tool,'-P','-g'] if sys.platform.startswith('aix'): cmd += ['-Xany','-C','-p'] process = subprocess.Popen(cmd+[lib], bufsize=1, @@ -68,8 +68,8 @@ yield (match.group(1), False) process.wait() -def readobj_get_symbols(lib): - process = subprocess.Popen(['llvm-readobj','--symbols',lib], bufsize=1, +def readobj_get_symbols(tool, lib): + process = subprocess.Popen([tool,'--symbols',lib], bufsize=1, stdout=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) process.stdin.close() @@ -95,10 +95,10 @@ # Define functions which determine if the target is 32-bit Windows (as that's # where calling convention name decoration happens). -def dumpbin_is_32bit_windows(lib): +def dumpbin_is_32bit_windows(tool, lib): # dumpbin /headers can output a huge amount of data (>100MB in a debug # build) so we read only up to the 'machine' line then close the output. - process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1, + process = subprocess.Popen([tool,'/headers',lib], bufsize=1, stdout=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) process.stdin.close() @@ -112,8 +112,8 @@ process.wait() return retval -def objdump_is_32bit_windows(lib): - output = subprocess.check_output(['objdump','-f',lib], +def objdump_is_32bit_windows(tool, lib): + output = subprocess.check_output([tool,'-f',lib], universal_newlines=True) for line in output.splitlines(): match = re.match('.+file format (\S+)', line) @@ -121,8 +121,8 @@ return (match.group(1) == 'pe-i386') return False -def readobj_is_32bit_windows(lib): - output = subprocess.check_output(['llvm-readobj','--file-header',lib], +def readobj_is_32bit_windows(tool, lib): + output = subprocess.check_output([tool,'--file-header',lib], universal_newlines=True) for line in output.splitlines(): match = re.match('Format: (\S+)', line) @@ -132,7 +132,7 @@ # On AIX, there isn't an easy way to detect 32-bit windows objects with the system toolchain, # so just assume false. -def aix_is_32bit_windows(lib): +def aix_is_32bit_windows(tool, lib): return False # MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the @@ -355,10 +355,10 @@ return components def extract_symbols(arg): - get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg + get_symbols, get_symbols_tool, should_keep_symbol, calling_convention_decoration, lib = arg symbol_defs = dict() symbol_refs = set() - for (symbol, is_def) in get_symbols(lib): + for (symbol, is_def) in get_symbols(get_symbols_tool, lib): symbol = should_keep_symbol(symbol, calling_convention_decoration) if symbol: if is_def: @@ -392,8 +392,20 @@ # Not a template return None +def parse_arg_override(parser, val): + tool, _, path = val.partition('=') + if not tool in known_tools: + parser.error(f'Unknown tool: {tool}') + if not path or not os.path.isfile(path): + parser.error(f'Override path for tool {tool} does not exist') + return (tool, path) + if __name__ == '__main__': - tool_exes = ['dumpbin','nm','objdump','llvm-readobj'] + known_tools = {'dumpbin': 'dumpbin', + 'nm': 'nm', + 'objdump': 'objdump', + 'llvm-readobj': 'llvm-readobj'} + tool_exes = list(known_tools) parser = argparse.ArgumentParser( description='Extract symbols to export from libraries') parser.add_argument('--mangling', choices=['itanium','microsoft'], @@ -401,11 +413,18 @@ parser.add_argument('--tools', choices=tool_exes, nargs='*', help='tools to use to extract symbols and determine the' ' target') + parser.add_argument('--override', action='append', metavar='<tool>=<path>', + type=lambda x: parse_arg_override(parser, x), + help='explicitly specify <path> to run <tool>') parser.add_argument('libs', metavar='lib', type=str, nargs='+', help='libraries to extract symbols from') parser.add_argument('-o', metavar='file', type=str, help='output to file') args = parser.parse_args() + if args.override: + for (tool, path) in args.override: + known_tools[tool] = path + # Determine the function to use to get the list of symbols from the inputs, # and the function to use to determine if the target is 32-bit windows. tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows), @@ -413,18 +432,20 @@ 'objdump' : (None, objdump_is_32bit_windows), 'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) } get_symbols = None + get_symbols_tool = None is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None + is_32bit_windows_tool = None # If we have a tools argument then use that for the list of tools to check if args.tools: tool_exes = args.tools # Find a tool to use by trying each in turn until we find one that exists # (subprocess.call will throw OSError when the program does not exist) - get_symbols = None for exe in tool_exes: try: + tool = known_tools[exe] # Close std streams as we don't want any output and we don't # want the process to wait for something on stdin. - p = subprocess.Popen([exe], stdout=subprocess.PIPE, + p = subprocess.Popen([tool], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) @@ -436,8 +457,10 @@ # is_32bit_windows if not get_symbols: get_symbols = tools[exe][0] + get_symbols_tool = tool if not is_32bit_windows: is_32bit_windows = tools[exe][1] + is_32bit_windows_tool = tool if get_symbols and is_32bit_windows: break except OSError: @@ -478,7 +501,7 @@ # Check if calling convention decoration is used by inspecting the first # library in the list - calling_convention_decoration = is_32bit_windows(libs[0]) + calling_convention_decoration = is_32bit_windows(is_32bit_windows_tool, libs[0]) # Extract symbols from libraries in parallel. This is a huge time saver when # doing a debug build, as there are hundreds of thousands of symbols in each @@ -489,7 +512,8 @@ # use a lambda or local function definition as that doesn't work on # windows, so create a list of tuples which duplicates the arguments # that are the same in all calls. - vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs] + vals = [(get_symbols, get_symbols_tool, should_keep_symbol, + calling_convention_decoration, x) for x in libs] # Do an async map then wait for the result to make sure that # KeyboardInterrupt gets caught correctly (see # http://bugs.python.org/issue8296) Index: clang/CMakeLists.txt =================================================================== --- clang/CMakeLists.txt +++ clang/CMakeLists.txt @@ -616,6 +616,12 @@ elseif(MSVC) add_dependencies(clang-bootstrap-deps llvm-lib) set(${CLANG_STAGE}_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-lib) + # 'extract_symbols.py' uses utilities like 'dumpbin' and 'llvm-readobj' + # which can be found via PATH. They cannot read bitcode files, resulting + # in missing symbols for plugins if the compiler is built with LTO. To fix + # this, build 'llvm-nm' and instruct 'extract_symbols.py' to use it. + add_dependencies(clang-bootstrap-deps llvm-nm) + set(BOOTSTRAP_LLVM_EXTRACT_SYMBOLS_FLAGS --tools nm dumpbin --override nm=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-nm.exe) elseif(NOT WIN32) add_dependencies(clang-bootstrap-deps llvm-ar llvm-ranlib) if(NOT BOOTSTRAP_LLVM_ENABLE_LLD AND LLVM_BINUTILS_INCDIR)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits