tejohnson created this revision. tejohnson added a reviewer: pcc. Herald added subscribers: dexonsmith, steven_wu, inglorion, mehdi_amini.
Currently, -flto-unit is specified whenever LTO options are used (unless using the old LTO API). This causes vtable defs to be processed using regular LTO, which is needed for CFI and whole program vtable optimizations, since they need to modify the vtables in a whole program manner. However, this causes non-negligible overhead due to the regular LTO processing. Since this isn't needed when not using CFI or -fwhole-program-vtables, only enable -flto-unit in those cases. Otherwise all ThinLTO compiles pay the overhead, even when not needed. Repository: rC Clang https://reviews.llvm.org/D53524 Files: include/clang/Driver/SanitizerArgs.h lib/Driver/SanitizerArgs.cpp lib/Driver/ToolChains/Clang.cpp test/Driver/lto-unit.c Index: test/Driver/lto-unit.c =================================================================== --- test/Driver/lto-unit.c +++ test/Driver/lto-unit.c @@ -1,9 +1,9 @@ -// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=NOUNIT %s // UNIT: "-flto-unit" // NOUNIT-NOT: "-flto-unit" Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -3364,21 +3364,6 @@ // the use-list order, since serialization to bitcode is part of the flow. if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - - // Device-side jobs do not support LTO. - bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || - JA.isDeviceOffloading(Action::OFK_Host)); - - if (D.isUsingLTO() && !isDeviceOffloadAction) { - Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); - - // The Darwin and PS4 linkers currently use the legacy LTO API, which - // does not support LTO unit features (CFI, whole program vtable opt) - // under ThinLTO. - if (!(RawTriple.isOSDarwin() || RawTriple.isPS4()) || - D.getLTOMode() == LTOK_Full) - CmdArgs.push_back("-flto-unit"); - } } if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { @@ -4980,6 +4965,25 @@ CmdArgs.push_back("-fwhole-program-vtables"); } + // Device-side jobs do not support LTO. + bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || + JA.isDeviceOffloading(Action::OFK_Host)); + + if (D.isUsingLTO() && + (isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) && + !isDeviceOffloadAction) { + Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); + + // Enable LTO unit if need for CFI or whole program vtable optimization. + // The Darwin and PS4 linkers currently use the legacy LTO API, which + // does not support LTO unit features (CFI, whole program vtable opt) + // under ThinLTO. + bool SupportsLTOUnit = !(RawTriple.isOSDarwin() || RawTriple.isPS4()) || + D.getLTOMode() == LTOK_Full; + if ((WholeProgramVTables || Sanitize.needsLTO()) && SupportsLTOUnit) + CmdArgs.push_back("-flto-unit"); + } + if (Arg *A = Args.getLastArg(options::OPT_fexperimental_isel, options::OPT_fno_experimental_isel)) { CmdArgs.push_back("-mllvm"); Index: lib/Driver/SanitizerArgs.cpp =================================================================== --- lib/Driver/SanitizerArgs.cpp +++ lib/Driver/SanitizerArgs.cpp @@ -207,6 +207,10 @@ return Sanitizers.Mask & NeedsUnwindTables; } +bool SanitizerArgs::needsLTO() const { + return Sanitizers.Mask & NeedsLTO; +} + SanitizerArgs::SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args) { SanitizerMask AllRemove = 0; // During the loop below, the accumulated set of Index: include/clang/Driver/SanitizerArgs.h =================================================================== --- include/clang/Driver/SanitizerArgs.h +++ include/clang/Driver/SanitizerArgs.h @@ -78,6 +78,7 @@ bool requiresPIE() const; bool needsUnwindTables() const; + bool needsLTO() const; bool linkCXXRuntimes() const { return LinkCXXRuntimes; } bool hasCrossDsoCfi() const { return CfiCrossDso; } void addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
Index: test/Driver/lto-unit.c =================================================================== --- test/Driver/lto-unit.c +++ test/Driver/lto-unit.c @@ -1,9 +1,9 @@ -// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin -fwhole-program-vtables 2>&1 | FileCheck --check-prefix=NOUNIT %s // UNIT: "-flto-unit" // NOUNIT-NOT: "-flto-unit" Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -3364,21 +3364,6 @@ // the use-list order, since serialization to bitcode is part of the flow. if (JA.getType() == types::TY_LLVM_BC) CmdArgs.push_back("-emit-llvm-uselists"); - - // Device-side jobs do not support LTO. - bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || - JA.isDeviceOffloading(Action::OFK_Host)); - - if (D.isUsingLTO() && !isDeviceOffloadAction) { - Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); - - // The Darwin and PS4 linkers currently use the legacy LTO API, which - // does not support LTO unit features (CFI, whole program vtable opt) - // under ThinLTO. - if (!(RawTriple.isOSDarwin() || RawTriple.isPS4()) || - D.getLTOMode() == LTOK_Full) - CmdArgs.push_back("-flto-unit"); - } } if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { @@ -4980,6 +4965,25 @@ CmdArgs.push_back("-fwhole-program-vtables"); } + // Device-side jobs do not support LTO. + bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) || + JA.isDeviceOffloading(Action::OFK_Host)); + + if (D.isUsingLTO() && + (isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) && + !isDeviceOffloadAction) { + Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); + + // Enable LTO unit if need for CFI or whole program vtable optimization. + // The Darwin and PS4 linkers currently use the legacy LTO API, which + // does not support LTO unit features (CFI, whole program vtable opt) + // under ThinLTO. + bool SupportsLTOUnit = !(RawTriple.isOSDarwin() || RawTriple.isPS4()) || + D.getLTOMode() == LTOK_Full; + if ((WholeProgramVTables || Sanitize.needsLTO()) && SupportsLTOUnit) + CmdArgs.push_back("-flto-unit"); + } + if (Arg *A = Args.getLastArg(options::OPT_fexperimental_isel, options::OPT_fno_experimental_isel)) { CmdArgs.push_back("-mllvm"); Index: lib/Driver/SanitizerArgs.cpp =================================================================== --- lib/Driver/SanitizerArgs.cpp +++ lib/Driver/SanitizerArgs.cpp @@ -207,6 +207,10 @@ return Sanitizers.Mask & NeedsUnwindTables; } +bool SanitizerArgs::needsLTO() const { + return Sanitizers.Mask & NeedsLTO; +} + SanitizerArgs::SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args) { SanitizerMask AllRemove = 0; // During the loop below, the accumulated set of Index: include/clang/Driver/SanitizerArgs.h =================================================================== --- include/clang/Driver/SanitizerArgs.h +++ include/clang/Driver/SanitizerArgs.h @@ -78,6 +78,7 @@ bool requiresPIE() const; bool needsUnwindTables() const; + bool needsLTO() const; bool linkCXXRuntimes() const { return LinkCXXRuntimes; } bool hasCrossDsoCfi() const { return CfiCrossDso; } void addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits