Author: serge-sans-paille Date: 2025-03-02T20:21:44Z New Revision: 527af302b90eaf686959dfe569dceadd8e58d611
URL: https://github.com/llvm/llvm-project/commit/527af302b90eaf686959dfe569dceadd8e58d611 DIFF: https://github.com/llvm/llvm-project/commit/527af302b90eaf686959dfe569dceadd8e58d611.diff LOG: Add support for dynamic libraries in CLANG_BOLT (#127020) Added: Modified: clang/cmake/caches/BOLT.cmake clang/tools/driver/CMakeLists.txt clang/utils/perf-training/perf-helper.py Removed: ################################################################################ diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake index eba2346b2f4ca..1956c10463148 100644 --- a/clang/cmake/caches/BOLT.cmake +++ b/clang/cmake/caches/BOLT.cmake @@ -1,6 +1,7 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "") set(CLANG_BOLT "INSTRUMENT" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index 5d7962769014a..10ea5de387220 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -168,6 +168,28 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) ) set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}") + set(CLANG_BOLT_INPUTS $<TARGET_FILE:clang>) + set(CLANG_INSTRUMENTED_OUTPUTS ${CLANG_INSTRUMENTED}) + + # Add in dynamically linked libraries, if needs be. Currently only supported + # on Linux because it relies on LD_PRELOAD for instrumentation. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if (CLANG_LINK_CLANG_DYLIB) + set(CLANG_CPP_BOLT_INSTRUMENTED "clang-cxx-bolt.inst" CACHE STRING + "Name of BOLT-instrumented Clang library") + set(CLANG_CPP_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_CPP_BOLT_INSTRUMENTED}) + list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:clang-cpp>) + list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${CLANG_CPP_INSTRUMENTED}) + endif() + if (LLVM_LINK_LLVM_DYLIB) + set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING + "Name of BOLT-instrumented LLVM library") + set(LLVM_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED}) + list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:LLVM>) + list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${LLVM_INSTRUMENTED}) + endif() + endif() + # This POST_BUILD command is executed unconditionally even if the clang target # is already built. We need to wrap the whole bolt optimization process in # a single python wrapper, so that we can first check if the binary has @@ -176,15 +198,15 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) TARGET clang POST_BUILD COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/perf-training/perf-helper.py bolt-optimize - --method ${CLANG_BOLT} - --input $<TARGET_FILE:clang> - --instrumented-output ${CLANG_INSTRUMENTED} - --fdata ${BOLT_FDATA} - --perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR} - --readelf $<TARGET_FILE:llvm-readobj> - --bolt $<TARGET_FILE:llvm-bolt> - --lit "${LIT_COMMAND}" - --merge-fdata $<TARGET_FILE:merge-fdata> + --method ${CLANG_BOLT} + --input "${CLANG_BOLT_INPUTS}" + --instrumented-output "${CLANG_INSTRUMENTED_OUTPUTS}" + --fdata ${BOLT_FDATA} + --perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR} + --readelf $<TARGET_FILE:llvm-readobj> + --bolt $<TARGET_FILE:llvm-bolt> + --lit "${LIT_COMMAND}" + --merge-fdata $<TARGET_FILE:merge-fdata> COMMENT "Optimizing Clang with BOLT" USES_TERMINAL VERBATIM diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index 55c5160a71c4f..cdb6c39f6c50e 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -560,6 +560,23 @@ def genOrderFile(args): return 0 +def filter_bolt_optimized(inputs, instrumented_outputs): + new_inputs = [] + new_instrumented_ouputs = [] + for input, instrumented_output in zip(inputs, instrumented_outputs): + output = subprocess.check_output( + [opts.readelf, "-WS", input], universal_newlines=True + ) + + # This binary has already been bolt-optimized, so skip further processing. + if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE): + print(f"Skipping {input}, it's already instrumented") + else: + new_inputs.append(input) + new_instrumented_ouputs.append(instrumented_output) + return new_inputs, new_instrumented_ouputs + + def bolt_optimize(args): parser = argparse.ArgumentParser("%prog [options] ") parser.add_argument("--method", choices=["INSTRUMENT", "PERF", "LBR"]) @@ -574,47 +591,67 @@ def bolt_optimize(args): opts = parser.parse_args(args) - output = subprocess.check_output( - [opts.readelf, "-WS", opts.input], universal_newlines=True - ) + inputs = opts.input.split(";") + instrumented_outputs = opts.instrumented_output.split(";") + assert len(inputs) == len( + instrumented_outputs + ), "inconsistent --input / --instrumented-output arguments" - # This binary has already been bolt-optimized, so skip further processing. - if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE): + inputs, instrumented_outputs = filter_bolt_optimized(inputs, instrumented_outputs) + if not inputs: return 0 + environ = os.environ.copy() if opts.method == "INSTRUMENT": - process = subprocess.run( - [ + preloads = [] + for input, instrumented_output in zip(inputs, instrumented_outputs): + args = [ opts.bolt, - opts.input, + input, "-o", - opts.instrumented_output, + instrumented_output, "-instrument", "--instrumentation-file-append-pid", f"--instrumentation-file={opts.fdata}", - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) + ] + print("Running: " + " ".join(args)) + process = subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) - print(process.args) - for line in process.stdout: - sys.stdout.write(line) - process.check_returncode() + for line in process.stdout: + sys.stdout.write(line) + process.check_returncode() + output = subprocess.check_output( + [opts.readelf, "--file-header", input], universal_newlines=True + ) + if re.search(r"Type:\s*((Shared)|(DYN))", output): + # force using the instrumented version + preloads.append(instrumented_output) + + if preloads: + print("Patching execution environment for dynamic library") + environ["LD_PRELOAD"] = os.pathsep.join(preloads) + + args = [ + sys.executable, + opts.lit, + "-v", + os.path.join(opts.perf_training_binary_dir, f"bolt-fdata"), + ] + print("Running: " + " ".join(args)) process = subprocess.run( - [ - sys.executable, - opts.lit, - os.path.join(opts.perf_training_binary_dir, "bolt-fdata"), - ], + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, + env=environ, ) - print(process.args) for line in process.stdout: sys.stdout.write(line) process.check_returncode() @@ -624,14 +661,14 @@ def bolt_optimize(args): merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir]) - shutil.copy(opts.input, f"{opts.input}-prebolt") + for input in inputs: + shutil.copy(input, f"{input}-prebolt") - process = subprocess.run( - [ + args = [ opts.bolt, - f"{opts.input}-prebolt", + f"{input}-prebolt", "-o", - opts.input, + input, "-data", opts.fdata, "-reorder-blocks=ext-tsp", @@ -643,16 +680,18 @@ def bolt_optimize(args): "-use-gnu-stack", "-update-debug-sections", "-nl" if opts.method == "PERF" else "", - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) + ] + print("Running: " + " ".join(args)) + process = subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) - print(process.args) - for line in process.stdout: - sys.stdout.write(line) - process.check_returncode() + for line in process.stdout: + sys.stdout.write(line) + process.check_returncode() commands = { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits