ikudrin created this revision.
ikudrin added reviewers: john.brawn, daltenty, jsji, simon_tatham, tmatheson, 
mstorsjo, phosek.
ikudrin added projects: LLVM, clang.
Herald added subscribers: ekilmer, inglorion.
Herald added a project: All.
ikudrin requested review of this revision.

As for now, `extract_symbols.py` uses a predefined set of tools, none of which 
can read bitcode files. The patch makes it possible to override the used tool 
and passes a fresh built `llvm-nm` for that for multi-staged LTO builds. This 
fixes building plugins with LTO builds and subsequently makes 
`clang/test/Frontend/plugin-*` tests pass.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149119

Files:
  clang/CMakeLists.txt
  llvm/utils/extract_symbols.py

Index: llvm/utils/extract_symbols.py
===================================================================
--- llvm/utils/extract_symbols.py
+++ llvm/utils/extract_symbols.py
@@ -29,8 +29,8 @@
 # as, especially on Windows, waiting for the entire output to be ready can take
 # a significant amount of time.
 
-def dumpbin_get_symbols(lib):
-    process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1,
+def dumpbin_get_symbols(tool, lib):
+    process = subprocess.Popen([tool,'/symbols',lib], bufsize=1,
                                stdout=subprocess.PIPE, stdin=subprocess.PIPE,
                                universal_newlines=True)
     process.stdin.close()
@@ -41,10 +41,10 @@
             yield (match.group(2), match.group(1) != "UNDEF")
     process.wait()
 
-def nm_get_symbols(lib):
+def nm_get_symbols(tool, lib):
     # -P means the output is in portable format, and -g means we only get global
     # symbols.
-    cmd = ['nm','-P','-g']
+    cmd = [tool,'-P','-g']
     if sys.platform.startswith('aix'):
         cmd += ['-Xany','-C','-p']
     process = subprocess.Popen(cmd+[lib], bufsize=1,
@@ -68,8 +68,8 @@
             yield (match.group(1), False)
     process.wait()
 
-def readobj_get_symbols(lib):
-    process = subprocess.Popen(['llvm-readobj','--symbols',lib], bufsize=1,
+def readobj_get_symbols(tool, lib):
+    process = subprocess.Popen([tool,'--symbols',lib], bufsize=1,
                                stdout=subprocess.PIPE, stdin=subprocess.PIPE,
                                universal_newlines=True)
     process.stdin.close()
@@ -95,10 +95,10 @@
 # Define functions which determine if the target is 32-bit Windows (as that's
 # where calling convention name decoration happens).
 
-def dumpbin_is_32bit_windows(lib):
+def dumpbin_is_32bit_windows(tool, lib):
     # dumpbin /headers can output a huge amount of data (>100MB in a debug
     # build) so we read only up to the 'machine' line then close the output.
-    process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1,
+    process = subprocess.Popen([tool,'/headers',lib], bufsize=1,
                                stdout=subprocess.PIPE, stdin=subprocess.PIPE,
                                universal_newlines=True)
     process.stdin.close()
@@ -112,8 +112,8 @@
     process.wait()
     return retval
 
-def objdump_is_32bit_windows(lib):
-    output = subprocess.check_output(['objdump','-f',lib],
+def objdump_is_32bit_windows(tool, lib):
+    output = subprocess.check_output([tool,'-f',lib],
                                      universal_newlines=True)
     for line in output.splitlines():
         match = re.match('.+file format (\S+)', line)
@@ -121,8 +121,8 @@
             return (match.group(1) == 'pe-i386')
     return False
 
-def readobj_is_32bit_windows(lib):
-    output = subprocess.check_output(['llvm-readobj','--file-header',lib],
+def readobj_is_32bit_windows(tool, lib):
+    output = subprocess.check_output([tool,'--file-header',lib],
                                      universal_newlines=True)
     for line in output.splitlines():
         match = re.match('Format: (\S+)', line)
@@ -132,7 +132,7 @@
 
 # On AIX, there isn't an easy way to detect 32-bit windows objects with the system toolchain,
 # so just assume false.
-def aix_is_32bit_windows(lib):
+def aix_is_32bit_windows(tool, lib):
     return False
 
 # MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the
@@ -355,10 +355,10 @@
     return components
 
 def extract_symbols(arg):
-    get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg
+    get_symbols, get_symbols_tool, should_keep_symbol, calling_convention_decoration, lib = arg
     symbol_defs = dict()
     symbol_refs = set()
-    for (symbol, is_def) in get_symbols(lib):
+    for (symbol, is_def) in get_symbols(get_symbols_tool, lib):
         symbol = should_keep_symbol(symbol, calling_convention_decoration)
         if symbol:
             if is_def:
@@ -392,8 +392,20 @@
     # Not a template
     return None
 
+def parse_arg_override(parser, val):
+    tool, _, path = val.partition('=')
+    if not tool in known_tools:
+        parser.error(f'Unknown tool: {tool}')
+    if not path or not os.path.isfile(path):
+        parser.error(f'Override path for tool {tool} does not exist')
+    return (tool, path)
+
 if __name__ == '__main__':
-    tool_exes = ['dumpbin','nm','objdump','llvm-readobj']
+    known_tools = {'dumpbin': 'dumpbin',
+                   'nm': 'nm',
+                   'objdump': 'objdump',
+                   'llvm-readobj': 'llvm-readobj'}
+    tool_exes = list(known_tools)
     parser = argparse.ArgumentParser(
         description='Extract symbols to export from libraries')
     parser.add_argument('--mangling', choices=['itanium','microsoft'],
@@ -401,11 +413,18 @@
     parser.add_argument('--tools', choices=tool_exes, nargs='*',
                         help='tools to use to extract symbols and determine the'
                         ' target')
+    parser.add_argument('--override', action='append', metavar='<tool>=<path>',
+                        type=lambda x: parse_arg_override(parser, x),
+                        help='explicitly specify <path> to run <tool>')
     parser.add_argument('libs', metavar='lib', type=str, nargs='+',
                         help='libraries to extract symbols from')
     parser.add_argument('-o', metavar='file', type=str, help='output to file')
     args = parser.parse_args()
 
+    if args.override:
+        for (tool, path) in args.override:
+            known_tools[tool] = path
+
     # Determine the function to use to get the list of symbols from the inputs,
     # and the function to use to determine if the target is 32-bit windows.
     tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows),
@@ -413,18 +432,20 @@
               'objdump' : (None, objdump_is_32bit_windows),
               'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) }
     get_symbols = None
+    get_symbols_tool = None
     is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None
+    is_32bit_windows_tool = None
     # If we have a tools argument then use that for the list of tools to check
     if args.tools:
         tool_exes = args.tools
     # Find a tool to use by trying each in turn until we find one that exists
     # (subprocess.call will throw OSError when the program does not exist)
-    get_symbols = None
     for exe in tool_exes:
         try:
+            tool = known_tools[exe]
             # Close std streams as we don't want any output and we don't
             # want the process to wait for something on stdin.
-            p = subprocess.Popen([exe], stdout=subprocess.PIPE,
+            p = subprocess.Popen([tool], stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  stdin=subprocess.PIPE,
                                  universal_newlines=True)
@@ -436,8 +457,10 @@
             # is_32bit_windows
             if not get_symbols:
                 get_symbols = tools[exe][0]
+                get_symbols_tool = tool
             if not is_32bit_windows:
                 is_32bit_windows = tools[exe][1]
+                is_32bit_windows_tool = tool
             if get_symbols and is_32bit_windows:
                 break
         except OSError:
@@ -478,7 +501,7 @@
 
     # Check if calling convention decoration is used by inspecting the first
     # library in the list
-    calling_convention_decoration = is_32bit_windows(libs[0])
+    calling_convention_decoration = is_32bit_windows(is_32bit_windows_tool, libs[0])
 
     # Extract symbols from libraries in parallel. This is a huge time saver when
     # doing a debug build, as there are hundreds of thousands of symbols in each
@@ -489,7 +512,8 @@
         # use a lambda or local function definition as that doesn't work on
         # windows, so create a list of tuples which duplicates the arguments
         # that are the same in all calls.
-        vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs]
+        vals = [(get_symbols, get_symbols_tool, should_keep_symbol,
+                 calling_convention_decoration, x) for x in libs]
         # Do an async map then wait for the result to make sure that
         # KeyboardInterrupt gets caught correctly (see
         # http://bugs.python.org/issue8296)
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -616,6 +616,12 @@
     elseif(MSVC)
       add_dependencies(clang-bootstrap-deps llvm-lib)
       set(${CLANG_STAGE}_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-lib)
+      # 'extract_symbols.py' uses utilities like 'dumpbin' and 'llvm-readobj'
+      # which can be found via PATH. They cannot read bitcode files, resulting
+      # in missing symbols for plugins if the compiler is built with LTO. To fix
+      # this, build 'llvm-nm' and instruct 'extract_symbols.py' to use it.
+      add_dependencies(clang-bootstrap-deps llvm-nm)
+      set(BOOTSTRAP_LLVM_EXTRACT_SYMBOLS_FLAGS --tools nm dumpbin --override nm=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-nm.exe)
     elseif(NOT WIN32)
       add_dependencies(clang-bootstrap-deps llvm-ar llvm-ranlib)
       if(NOT BOOTSTRAP_LLVM_ENABLE_LLD AND LLVM_BINUTILS_INCDIR)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to