Introduce a new eclass with utility functions for handling LTO bytecode (or internal representation, IR) inside static archives (.a files).
Static libraries when built with LTO will contain LTO bytecode which is not portable across compiler versions or compiler vendors. To avoid pessimising the library and always filtering LTO, we can build it with -ffat-lto-objects instead, which builds some components twice. The installed part will then have the LTO contents stripped out, leaving the regular objects in the static archive. It's not feasible to make these work otherwise, as we'd need tracking for whether a library was built by a specific compiler and its version, and that compatibility can vary based on other factors (e.g. with gcc, sys-devel/gcc[zstd] controls if it supports zstd compression for LTO). We also discourage static libraries anyway. Provide two functions: * lto-guarantee-fat If LTO is currently enabled (as determined by `tc-is-lto`, added in 2aea6c3ff2181ad96187e456a3307609fd288d4c), add `-ffat-lto-objects` to CFLAGS and CXXFLAGS if supported. This guarantees that produced archives are "fat" (contain both IR and regular object files) for later pruning. * strip-lto-bytecode Process a given static archive (.a file) and remove its IR component, leaving a regular object. This approach is also taken by Fedora, openSUSE, and Debian/Ubuntu. An honourable mention to `lto-rebuild` which fulfilled the same task for many in the LTO overlay too. We did consider an alternative approach where we'd relink objects using the driver in src_install (or some hook afterwards), but this would be more brittle, as we'd need to extract the right arguments to use (see e.g. the recent Wireshark issues in fad8ff8a45afc83559f8df695cf96dfec51d3e8a for how this can be subtle) and not PM-agnostic given we don't have portable hooks right now (and even if we did, suspect they wouldn't work in a way that facilitated this). It's also not clear if such an approach would've worked for Clang. All of this wasn't worth pursuing until H. J. Lu's patches for binutils landed, which they have now in binutils-2.44 [0], which made bfd's handling of mixed objects much more robust. [0] https://inbox.sourceware.org/binutils/20250112220244.597636-1-hjl.to...@gmail.com/ Bug: https://bugs.gentoo.org/926120 Thanks-to: Arsen Arsenović <ar...@gentoo.org> Co-authored-by: Eli Schwartz <eschwa...@gentoo.org> Signed-off-by: Sam James <s...@gentoo.org> --- eclass/dot-a.eclass | 124 +++++++++++++++++ eclass/tests/dot-a.sh | 314 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 438 insertions(+) create mode 100644 eclass/dot-a.eclass create mode 100755 eclass/tests/dot-a.sh diff --git a/eclass/dot-a.eclass b/eclass/dot-a.eclass new file mode 100644 index 0000000000000..20a0fa1dfc206 --- /dev/null +++ b/eclass/dot-a.eclass @@ -0,0 +1,124 @@ +# Copyright 2025 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +# @ECLASS: dot-a.eclass +# @MAINTAINER: +# Toolchain +# Toolchain Ninjas <toolch...@gentoo.org> +# @AUTHOR: +# Sam James <s...@gentoo.org> +# Eli Schwartz <eschwa...@gentoo.org> +# @SUPPORTED_EAPIS: 8 +# @BLURB: Functions to handle stripping LTO bytecode out of static archives. +# @DESCRIPTION: +# This eclass provides functions to strip LTO bytecode out of static archives +# (.a files). +# +# Static libraries when built with LTO will contain LTO bytecode which is +# not portable across compiler versions or compiler vendors. To avoid pessimising +# the library and always filtering LTO, we can build it with -ffat-lto-objects +# instead, which builds some components twice. The installed part will then +# have the LTO contents stripped out, leaving the regular objects in the +# static archive. +# +# Use should be passing calling lto-guarantee-fat before configure-time +# and calling strip-lto-bytecode after installation. +# +# @EXAMPLE: +# @CODE +# +# inherit dot-a +# +# src_configure() { +# lto-guarantee-fat +# econf +# } +# +# src_install() { +# default +# strip-lto-bytecode +# } +case ${EAPI} in + 8) ;; + *) die "${ECLASS}: EAPI ${EAPI:-0} not supported" ;; +esac + +if [[ -z ${_DOT_A_ECLASS} ]] ; then +_DOT_A_ECLASS=1 + +inherit flag-o-matic toolchain-funcs + +# TODO: QA check + +# @FUNCTION: lto-guarantee-fat +# @DESCRIPTION: +# If LTO is enabled, appends -ffat-lto-objects or any other flags needed +# to provide fat LTO objects. +lto-guarantee-fat() { + tc-is-lto || return + + # We add this for all languages as LTO obviously can't be done + # if different compilers are used for e.g. C vs C++ anyway. + append-flags $(test-flags-CC -ffat-lto-objects) +} + +# @FUNCTION: strip-lto-bytecode +# @USAGE: [library|directory] [...] +# @DESCRIPTION: +# Strips LTO bytecode from libraries (static archives) passed as arguments. +# Defaults to operating on ${ED} as a whole if no arguments are passed. +# +# As an optimisation, if USE=static-libs exists for a package and is disabled, +# the default-searching behaviour with no arguments is suppressed. +strip-lto-bytecode() { + tc-is-lto || return + + local files=() + + if [[ ${#} -eq 0 ]]; then + if ! in_iuse static-libs || use static-libs ; then + # maybe we are USE=static-libs. Alternatively, maybe the ebuild doesn't + # offer such a choice. In both cases, the user specified the function, + # so we expect to be called on *something*, but nothing was explicitly + # passed. Try scanning ${ED} automatically. + set -- "${ED}" + fi + fi + + # Check if any of our arguments are directories to be recursed + # into. + local arg + for arg in "$@" ; do + if [[ -d ${arg} ]] ; then + mapfile -t -d '' -O "${#files[@]}" files < <(find "${arg}" -type f -iname '*.a' -print0) + else + files+=( "${arg}" ) + fi + done + + toolchain_type= + tc-is-gcc && toolchain_type=gnu + tc-is-clang && toolchain_type=llvm + + local file + for file in "${files[@]}" ; do + case ${toolchain_type} in + gnu) + $(tc-getSTRIP) \ + -R .gnu.lto_* \ + -R .gnu.debuglto_* \ + -N __gnu_lto_v1 \ + "${file}" || die "Stripping bytecode in ${file} failed" + ;; + llvm) + llvm-bitcode-strip \ + -r "${file}" \ + -o "${file}" || die "Stripping bytecode in ${file} failed" + ;; + *) + ;; + esac + done +} + +fi diff --git a/eclass/tests/dot-a.sh b/eclass/tests/dot-a.sh new file mode 100755 index 0000000000000..5c153b3b85fba --- /dev/null +++ b/eclass/tests/dot-a.sh @@ -0,0 +1,314 @@ +#!/bin/bash +# Copyright 2025 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +source tests-common.sh || exit +source version-funcs.sh || exit + +inherit dot-a + +_create_test_progs() { + cat <<-EOF > a.c + int foo(); + + int foo() { + return 42; + } + EOF + + cat <<-EOF > main.c + #include <stdio.h> + int foo(); + + int main() { + printf("Got magic number: %d\n", foo()); + return 0; + } + EOF +} + +test_lto_guarantee_fat() { + # Check whether lto-guarantee-fat adds -ffat-lto-objects and it + # results in a successful link (and a failed link without it). + LDFLAGS="-fuse-ld=${linker}" + + $(tc-getCC) ${CFLAGS} -flto a.c -o a.o -c || die + $(tc-getCC) ${CFLAGS} ${LDFLAGS} -flto main.c a.o -o main || die + if ./main | grep -q "Got magic number: 42" ; then + :; + else + die "Pure LTO check failed" + fi + + tbegin "lto-guarantee-fat (CC=$(tc-getCC), linker=${linker}): check linking w/ fat LTO object w LTO" + ret=0 + ( + export CFLAGS="-O2 -flto" + lto-guarantee-fat + + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + $(tc-getCC) ${CFLAGS} ${LDFLAGS} main.c a.o 2>/dev/null || return 1 + ) || ret=1 + tend ${ret} "Linking LTO executable w/ fat archive failed" + + tbegin "lto-guarantee-fat (CC=$(tc-getCC), linker=${linker}): check linking w/ fat LTO object w/o LTO" + ret=0 + ( + export CFLAGS="-O2 -flto" + lto-guarantee-fat + + # Linking here will fail if a.o isn't a fat object, as there's nothing + # to fall back on with -fno-lto. + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + $(tc-getCC) ${CFLAGS} ${LDFLAGS} -fno-lto main.c a.o 2>/dev/null || return 1 + ) || ret=1 + tend ${ret} "Linking non-LTO executable w/ fat archive failed" +} + +test_strip_lto_bytecode() { + # Check whether strip-lto-bytecode does its job on a single argument, but + # focus of this test is more basic, not checking all possible option + # handling. + # + # i.e. If we use strip-lto-bytecode, does it remove the LTO bytecode + # and allow linking? If we use it w/o -ffat-lto-objects, do we get + # a failed link as we expect? + LDFLAGS="-fuse-ld=${linker}" + + tbegin "strip-lto-bytecode (CC=$(tc-getCC), linker=${linker}): check that linking w/ stripped non-fat archive breaks" + ret=0 + ( + export CFLAGS="-O2 -flto" + + # strip-lto-bytecode will error out early with LLVM, + # so stop the test here. + tc-is-clang && return 0 + + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + + # This should corrupt a.o and make linking below fail. + strip-lto-bytecode a.o + + $(tc-getCC) ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Linking corrupted non-fat archive unexpectedly worked" + + tbegin "strip-lto-bytecode (CC=$(tc-getCC), linker=${linker}): check that linking w/ stripped fat archive works" + ret=0 + ( + export CFLAGS="-O2 -flto" + + lto-guarantee-fat + + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + + # This should NOT corrupt a.o, so linking below should succeed. + strip-lto-bytecode a.o + + $(tc-getCC) ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null || return 1 + ) || ret=1 + tend ${ret} "Linking stripped fat archive failed" +} + +test_mixed_objects_after_stripping() { + # Check whether mixing objects from two compilers (${CC_1} and ${CC_2}) + # fails without lto-guarantee-fat and strip-lto-bytecode and works + # once they're used. + LDFLAGS="-fuse-ld=${linker}" + + tbegin "strip-lto-bytecode (CC_1=${CC_1}, CC_2=${CC_2}, linker=${linker}): check that unstripped LTO objects from ${CC_1} fail w/ ${CC_2}" + ret=0 + ( + export CFLAGS="-O2 -flto" + + ${CC_1} ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + # Using CC_1 IR with CC_2 should fail. + ${CC_2} ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Mixing unstripped objects unexpectedly worked" + + tbegin "strip-lto-bytecode (CC_1=${CC_1}, CC_2=${CC_2}, linker=${linker}): check that unstripped LTO objects from ${CC_2} fail w/ ${CC_1}" + ret=0 + ( + export CFLAGS="-O2 -flto" + + ${CC_2} ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + # Using CC_2 IR with CC_1 should fail. + ${CC_1} ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Mixing unstripped objects unexpectedly worked" + + tbegin "strip-lto-bytecode (CC_1=${CC_1}, CC_2=${CC_2}, linker=${linker}): check that stripped LTO objects from ${CC_1} work w/ ${CC_2}" + ret=0 + ( + export CFLAGS="-O2 -flto" + + lto-guarantee-fat + ${CC_1} ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + # The object should now be "vendor-neutral" and work. + CC=${CC_1} strip-lto-bytecode a.o + ${CC_2} ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null || return 1 + ) || ret=1 + tend ${ret} "Mixing stripped objects failed" + + tbegin "strip-lto-bytecode (CC_1=${CC_1}, CC_2=${CC_2}, linker=${linker}): check that stripped LTO objects from ${CC_2} work w/ ${CC_1}" + ret=0 + ( + export CFLAGS="-O2 -flto" + + lto-guarantee-fat + ${CC_2} ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + # The object should now be "vendor-neutral" and work. + CC=${CC_2} strip-lto-bytecode a.o + ${CC_1} ${CFLAGS} ${LDFLAGS} main.c a.o -o main 2>/dev/null || return 1 + ) || ret=1 + tend ${ret} "Mixing stripped objects failed" +} + +_check_if_lto_object() { + # Adapted from tc-is-lto + local ret=1 + case $(tc-get-compiler-type) in + clang) + # If LTO is used, clang will output bytecode and llvm-bcanalyzer + # will run successfully. Otherwise, it will output plain object + # file and llvm-bcanalyzer will exit with error. + llvm-bcanalyzer "$1" &>/dev/null && ret=0 + ;; + gcc) + [[ $($(tc-getREADELF) -S "$1") == *.gnu.lto* ]] && ret=0 + ;; + esac + return "${ret}" +} + +test_search_recursion() { + # Test whether the argument handling and logic of strip-lto-bytecode + # works as expected. + tbegin "whether default search behaviour of \${ED} works" + ret=0 + ( + CC=gcc + CFLAGS="-O2 -flto" + + _create_test_progs + lto-guarantee-fat + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + ar q foo.a a.o 2>/dev/null || return 1 + + _check_if_lto_object "${tmpdir}/lto/foo.a" || return 1 + # It should search ${ED} if no arguments are passed, find + # the LTO'd foo.o, and strip it. + ED="${tmpdir}/lto" strip-lto-bytecode + # foo.a should be a regular object here. + _check_if_lto_object "${tmpdir}/lto/foo.a" && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Unexpected LTO object found" + + tbegin "whether a single file argument works" + ret=0 + ( + CC=gcc + CFLAGS="-O2 -flto" + + _create_test_progs + lto-guarantee-fat + $(tc-getCC) ${CFLAGS} a.c -o a.o -c 2>/dev/null || return 1 + ar q foo.a a.o 2>/dev/null || return 1 + + _check_if_lto_object "${tmpdir}/lto/foo.a" || return 1 + # It should search ${ED} if no arguments are passed, find + # the LTO'd foo.o, and strip it. + ED="${tmpdir}/lto" strip-lto-bytecode "${tmpdir}/lto/foo.a" + # foo.a should be a regular object here. + _check_if_lto_object "${tmpdir}/lto/foo.a" && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Unexpected LTO object found" + + tbegin "whether a directory and file argument works" + ret=0 + ( + mkdir "${tmpdir}"/lto2 || die + + CC=gcc + CFLAGS="-O2 -flto" + + _create_test_progs + lto-guarantee-fat + $(tc-getCC) ${CFLAGS} "${tmpdir}"/lto/a.c -o "${tmpdir}"/lto/a.o -c 2>/dev/null || return 1 + ar q foo.a a.o 2>/dev/null || return 1 + ar q "${tmpdir}"/lto2/foo.a a.o 2>/dev/null || return 1 + + _check_if_lto_object "${tmpdir}/lto/foo.a" || return 1 + _check_if_lto_object "${tmpdir}/lto2/foo.a" || return 1 + # It should search ${ED} if no arguments are passed, find + # the LTO'd foo.o, and strip it. + ED="${tmpdir}/lto" strip-lto-bytecode "${tmpdir}/lto/foo.a" "${tmpdir}/lto2/foo.a" + # foo.a should be a regular object here. + _check_if_lto_object "${tmpdir}/lto/foo.a" && return 1 + _check_if_lto_object "${tmpdir}/lto2/foo.a" && return 1 + + return 0 + ) || ret=1 + tend ${ret} "Unexpected LTO object found" +} + +_repeat_tests_with_compilers() { + # Call test_lto_guarantee_fat and test_strip_lto_bytecode with + # various compilers and linkers. + for CC in gcc clang ; do + type -P ${CC} &>/dev/null || continue + + for linker in gold bfd lld mold gold ; do + # lld doesn't support GCC LTO: https://github.com/llvm/llvm-project/issues/41791 + [[ ${CC} == gcc && ${linker} == lld ]] && continue + # Make sure the relevant linker is actually installed and usable. + LDFLAGS="-fuse-ld=${linker}" tc-ld-is-${linker} || continue + LDFLAGS="-fuse-ld=${linker}" test-compile 'c+ld' 'int main() { return 0; }' || continue + + test_lto_guarantee_fat + test_strip_lto_bytecode + done + done +} + +_repeat_mixed_tests_with_linkers() { + # Call test_mixed_objects_after_stripping with various linkers. + # + # Needs both GCC and Clang to test mixing their outputs. + if type -P gcc &>/dev/null && type -P clang &>/dev/null ; then + for linker in bfd lld mold gold ; do + # lld doesn't support GCC LTO: https://github.com/llvm/llvm-project/issues/41791 + [[ ${CC} == gcc && ${linker} == lld ]] && continue + # Make sure the relevant linker is actually installed and usable. + LDFLAGS="-fuse-ld=${linker}" tc-ld-is-${linker} || continue + LDFLAGS="-fuse-ld=${linker}" test-compile 'c+ld' 'int main() { return 0; }' || continue + + CC_1=gcc + CC_2=clang + test_mixed_objects_after_stripping + done + fi +} + +mkdir -p "${tmpdir}/lto" || die +pushd "${tmpdir}/lto" >/dev/null || die +_create_test_progs +_repeat_tests_with_compilers +_repeat_mixed_tests_with_linkers +test_search_recursion +texit + +# TODO: test multiple files -- 2.49.0