tejohnson created this revision. tejohnson added a reviewer: snehasish. Herald added subscribers: hoy, ormris, arphaman, steven_wu, hiraditya, inglorion. Herald added a project: All. tejohnson requested review of this revision. Herald added projects: clang, LLVM. Herald added a subscriber: cfe-commits.
Adds an LTO option to indicate that whether we are linking with an allocator that supports hot/cold operator new interfaces. If not, at the start of the LTO backends any existing memprof hot/cold attributes are removed from the IR, and we also remove memprof metadata so that post-LTO inlining doesn't add any new attributes. This is done via setting a new flag in the module summary index. It is important to communicate via the index to the LTO backends so distributed ThinLTO handles this correctly, as it is invoked by a separate process and the combined index is how we communicate information from the LTO link. Additionally, when the LTO option is disabled, exit early from the MemProfContextDisambiguation handling performed during LTO, as this is unnecessary. Depends on D149117 <https://reviews.llvm.org/D149117> and D149192 <https://reviews.llvm.org/D149192>. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D149215 Files: clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll llvm/include/llvm/IR/ModuleSummaryIndex.h llvm/include/llvm/LTO/LTO.h llvm/lib/Bitcode/Reader/BitcodeReader.cpp llvm/lib/IR/ModuleSummaryIndex.cpp llvm/lib/LTO/LTO.cpp llvm/lib/LTO/LTOBackend.cpp llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll llvm/test/ThinLTO/X86/memprof-basic.ll llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll llvm/test/ThinLTO/X86/memprof-indirectcall.ll llvm/test/ThinLTO/X86/memprof-inlined.ll llvm/test/ThinLTO/X86/memprof-inlined2.ll llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll llvm/test/Transforms/MemProfContextDisambiguation/basic.ll llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
Index: llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll @@ -42,7 +42,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP Index: llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -41,7 +41,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ Index: llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -51,7 +51,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ Index: llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll @@ -45,7 +45,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ Index: llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll @@ -93,7 +93,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP Index: llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -53,7 +53,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ Index: llvm/test/Transforms/MemProfContextDisambiguation/basic.ll =================================================================== --- llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -31,7 +31,7 @@ ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. -; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ @@ -42,6 +42,15 @@ ;; We should have cloned bar, baz, and foo, for the cold memory allocation. ; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED +;; Check again without -supports-hot-cold-new and ensure all MIB are cold and +;; that there is no cloning. +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --implicit-check-not="Callsite Context Graph" \ +; RUN: --implicit-check-not="created clone" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll @@ -0,0 +1,57 @@ +;; Test that passing -supports-hot-cold-new to the LTO link allows context +;; disambiguation to proceed, and also prevents memprof metadata and attributes +;; from being removed from the LTO backend, and vice versa without passing +;; -supports-hot-cold-new. + +;; First check with -supports-hot-cold-new. +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP +; DUMP: Callsite Context Graph: + +; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s --check-prefix=IR +; IR: !memprof {{.*}} !callsite +; IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "Callsite Context Graph:" + +; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "memprof-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call2 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} Index: llvm/test/ThinLTO/X86/memprof-inlined2.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-inlined2.ll +++ llvm/test/ThinLTO/X86/memprof-inlined2.ll @@ -44,6 +44,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Z3barv,plx \ ; RUN: -r=%t.o,_Z3bazv,plx \ Index: llvm/test/ThinLTO/X86/memprof-inlined.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-inlined.ll +++ llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -43,6 +43,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -64,6 +65,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ @@ -88,24 +90,6 @@ ; RUN: --check-prefix=STATS-BE --check-prefix=STATS-DISTRIB-BE \ ; RUN: --check-prefix=REMARKS -;; Try again but with distributed ThinLTO -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_ZdaPv, \ -; RUN: -r=%t.o,sleep, \ -; RUN: -r=%t.o,_Znam, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ -; RUN: -stats -pass-remarks=memprof-context-disambiguation \ -; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT -;; We should create clones for foo and bar for the call from main to allocate -;; cold memory. -; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED - source_filename = "inlined.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/ThinLTO/X86/memprof-indirectcall.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-indirectcall.ll +++ llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -53,6 +53,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,sleep, \ ; RUN: -r=%t.o,_Znam, \ @@ -75,6 +76,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ @@ -100,26 +102,6 @@ ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \ ; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS -;; Try again but with distributed ThinLTO -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_ZdaPv, \ -; RUN: -r=%t.o,sleep, \ -; RUN: -r=%t.o,_Znam, \ -; RUN: -r=%t.o,_ZTVN10__cxxabiv120__si_class_type_infoE, \ -; RUN: -r=%t.o,_ZTVN10__cxxabiv117__class_type_infoE, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ -; RUN: -stats -pass-remarks=memprof-context-disambiguation \ -; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT -;; We should only create a single clone of foo, for the direct call -;; from main allocating cold memory. -; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED - source_filename = "indirectcall.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll +++ llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll @@ -48,6 +48,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -62,6 +63,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ Index: llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll +++ llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll @@ -95,6 +95,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Z1Db,plx \ ; RUN: -r=%t.o,_Z1Cb,plx \ Index: llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll +++ llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -55,6 +55,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -75,6 +76,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ @@ -101,27 +103,6 @@ ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \ ; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS -;; Try again but with distributed ThinLTO -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_ZdaPv, \ -; RUN: -r=%t.o,sleep, \ -; RUN: -r=%t.o,_Znam, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ -; RUN: -stats -pass-remarks=memprof-context-disambiguation \ -; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS - -; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST -;; We should clone D once for the cold allocations via C. -; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED - -;; Check distributed index -; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB - source_filename = "duplicate-context-ids.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/ThinLTO/X86/memprof-basic.ll =================================================================== --- llvm/test/ThinLTO/X86/memprof-basic.ll +++ llvm/test/ThinLTO/X86/memprof-basic.ll @@ -33,6 +33,7 @@ ; RUN: opt -thinlto-bc %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ ; RUN: -r=%t.o,sleep, \ @@ -52,6 +53,7 @@ ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ ; RUN: -thinlto-distributed-indexes \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_ZdaPv, \ @@ -77,26 +79,6 @@ ; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \ ; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS -;; Try again but with distributed ThinLTO -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%t.o,main,plx \ -; RUN: -r=%t.o,_ZdaPv, \ -; RUN: -r=%t.o,sleep, \ -; RUN: -r=%t.o,_Znam, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ -; RUN: -stats -pass-remarks=memprof-context-disambiguation \ -; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS - -; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT -;; We should have cloned bar, baz, and foo, for the cold memory allocation. -; RUN: cat %t2.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED - -;; Check distributed index -; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB - source_filename = "memprof-basic.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll @@ -0,0 +1,65 @@ +;; Test that passing -supports-hot-cold-new to the LTO link allows context +;; disambiguation to proceed, and also prevents memprof metadata and attributes +;; from being removed from the LTO backend, and vice versa without passing +;; -supports-hot-cold-new. + +;; Note that this tests regular LTO (with a summary) due to the module flag +;; disabling ThinLTO. + +;; First check with -supports-hot-cold-new. +; RUN: opt -module-summary %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP +; DUMP: Callsite Context Graph: + +; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s --check-prefix=IR +; IR: !memprof {{.*}} !callsite +; IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-dump-ccg \ +; RUN: -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --allow-empty \ +; RUN: --implicit-check-not "Callsite Context Graph:" + +; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "memprof-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call2 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!llvm.module.flags = !{!6} + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} + +;; Force regular LTO even though we have a summary. +!6 = !{i32 1, !"ThinLTO", i32 0} Index: llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp =================================================================== --- llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -104,6 +104,12 @@ cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden); +// Indicate we are linking with an allocator that supports hot/cold operator +// new interfaces. +cl::opt<bool> SupportsHotColdNew( + "supports-hot-cold-new", cl::init(false), cl::Hidden, + cl::desc("Linking with hot/cold operator new interfaces")); + /// CRTP base for graphs built from either IR or ThinLTO summary index. /// /// The graph represents the call contexts in all memprof metadata on allocation @@ -3154,6 +3160,15 @@ return Changed; } + // TODO: If/when other types of memprof cloning are enabled beyond just for + // hot and cold, we will need to change this to individually control the + // AllocationType passed to addStackNodesForMIB during CCG construction. + // Note that we specifically check this after applying imports above, so that + // the option isn't needed to be passed to distributed ThinLTO backend + // processes. + if (!SupportsHotColdNew) + return Changed; + ModuleCallsiteContextGraph CCG(M, OREGetter); Changed = CCG.process(); @@ -3193,6 +3208,14 @@ ModuleSummaryIndex &Index, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> isPrevailing) { + // TODO: If/when other types of memprof cloning are enabled beyond just for + // hot and cold, we will need to change this to individually control the + // AllocationType passed to addStackNodesForMIB during CCG construction. + // The index was set from the option, so these should be in sync. + assert(Index.withSupportsHotColdNew() == SupportsHotColdNew); + if (!SupportsHotColdNew) + return; + IndexCallsiteContextGraph CCG(Index, isPrevailing); CCG.process(); } Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -503,6 +503,8 @@ std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, Mod); + updateMemProfAttributes(Mod, CombinedIndex); + if (!C.CodeGenOnly) { if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false, /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, @@ -565,6 +567,8 @@ // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); + updateMemProfAttributes(Mod, CombinedIndex); + updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility()); if (Conf.CodeGenOnly) { Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -76,6 +76,10 @@ cl::desc("Enable global value internalization in LTO")); } +/// Indicate we are linking with an allocator that supports hot/cold operator +/// new interfaces. +extern cl::opt<bool> SupportsHotColdNew; + /// Enable MemProf context disambiguation for thin link. extern cl::opt<bool> EnableMemProfContextDisambiguation; @@ -1079,6 +1083,9 @@ return StatsFileOrErr.takeError(); std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get()); + if (SupportsHotColdNew) + ThinLTO.CombinedIndex.setWithSupportsHotColdNew(); + Error Result = runRegularLTO(AddStream); if (!Result) Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); @@ -1089,6 +1096,37 @@ return Result; } +void lto::updateMemProfAttributes(Module &Mod, + const ModuleSummaryIndex &Index) { + if (Index.withSupportsHotColdNew()) + return; + + // The profile matcher applies hotness attributes directly for allocations, + // and those will cause us to generate calls to the hot/cold interfaces + // unconditionally. If supports-hot-cold-new was not enabled in the LTO + // link then assume we don't want these calls (e.g. not linking with + // the appropriate library, or otherwise trying to disable this behavior). + for (auto &F : Mod) { + for (auto &BB : F) { + for (auto &I : BB) { + auto *CI = dyn_cast<CallBase>(&I); + if (!CI) + continue; + if (CI->hasFnAttr("memprof")) + CI->removeFnAttr("memprof"); + // Strip off all memprof metadata as it is no longer needed. + // Importantly, this avoids the addition of new memprof attributes + // after inlining propagation. + // TODO: If we support additional types of MemProf metadata beyond hot + // and cold, we will need to update the metadata based on the allocator + // APIs supported instead of completely stripping all. + CI->setMetadata(LLVMContext::MD_memprof, nullptr); + CI->setMetadata(LLVMContext::MD_callsite, nullptr); + } + } + } +} + Error LTO::runRegularLTO(AddStreamFn AddStream) { // Setup optimization remarks. auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( @@ -1142,6 +1180,8 @@ } } + updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex); + // If allowed, upgrade public vcall visibility metadata to linkage unit // visibility before whole program devirtualization in the optimizer. updateVCallVisibilityInModule(*RegularLTO.CombinedModule, Index: llvm/lib/IR/ModuleSummaryIndex.cpp =================================================================== --- llvm/lib/IR/ModuleSummaryIndex.cpp +++ llvm/lib/IR/ModuleSummaryIndex.cpp @@ -107,11 +107,13 @@ Flags |= 0x40; if (withWholeProgramVisibility()) Flags |= 0x80; + if (withSupportsHotColdNew()) + Flags |= 0x100; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0xff && "Unexpected bits in flag"); + assert(Flags <= 0x1ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -145,6 +147,10 @@ // Set on combined index only. if (Flags & 0x80) setWithWholeProgramVisibility(); + // 1 bit: WithSupportsHotColdNew flag. + // Set on combined index only. + if (Flags & 0x100) + setWithSupportsHotColdNew(); } // Collect for the given module the list of function it defines Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8067,7 +8067,7 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0xff && "Unexpected bits in flag"); + assert(Flags <= 0x1ff && "Unexpected bits in flag"); return Flags & 0x8; } Index: llvm/include/llvm/LTO/LTO.h =================================================================== --- llvm/include/llvm/LTO/LTO.h +++ llvm/include/llvm/LTO/LTO.h @@ -96,6 +96,11 @@ /// ordered indices to elements in the input array. std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R); +/// Updates MemProf attributes (and metadata) based on whether the index +/// has recorded that we are linking with allocation libraries containing +/// the necessary APIs for downstream transformations. +void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index); + class LTO; struct SymbolResolution; class ThinBackendProc; Index: llvm/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1305,6 +1305,9 @@ /// Indicates that summary-based synthetic entry count propagation has run bool HasSyntheticEntryCounts = false; + /// Indicates that we linked with allocator supporting hot/cold new operators. + bool WithSupportsHotColdNew = false; + /// Indicates that distributed backend should skip compilation of the /// module. Flag is suppose to be set by distributed ThinLTO indexing /// when it detected that the module is not needed during the final @@ -1513,6 +1516,9 @@ bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; } void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; } + bool withSupportsHotColdNew() const { return WithSupportsHotColdNew; } + void setWithSupportsHotColdNew() { WithSupportsHotColdNew = true; } + bool skipModuleByDistributedBackend() const { return SkipModuleByDistributedBackend; } Index: clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll =================================================================== --- /dev/null +++ clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll @@ -0,0 +1,70 @@ +; REQUIRES: x86-registered-target + +;; Test that passing -supports-hot-cold-new to the thin link prevents memprof +;; metadata and attributes from being removed from the distributed ThinLTO +;; backend, and vice versa without passing -supports-hot-cold-new. + +;; First check with -supports-hot-cold-new. +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -supports-hot-cold-new \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -o %t.out + +;; Ensure that the index file reflects the -supports-hot-cold-new, as that is +;; how the ThinLTO backend behavior is controlled. +; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-ON +;; Flags are printed in decimal, but this corresponds to 0x161, and 0x100 is +;; the value indicating -supports-hot-cold-new was enabled. +; CHECK-INDEX-ON: flags: 353 + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj + +; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK-IR +; CHECK-IR: !memprof {{.*}} !callsite +; CHECK-IR: "memprof"="cold" + +;; Next check without -supports-hot-cold-new, we should not perform +;; context disambiguation, and we should strip memprof metadata and +;; attributes before optimization during the distributed backend. +; RUN: llvm-lto2 run %t.o -save-temps \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -o %t.out + +;; Ensure that the index file reflects not having -supports-hot-cold-new. +; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-OFF +;; Flags are printed in decimal, but this corresponds to 0x61, without 0x100 set. +; CHECK-INDEX-OFF: flags: 97 + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj + +; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s \ +; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \ +; RUN: --implicit-check-not "memprof"="cold" + +source_filename = "thinlto-distributed-supports-hot-cold-new.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + %call1 = call ptr @_Znam(i64 0) #1 + ret i32 0 +} + +declare ptr @_Znam(i64) + +attributes #0 = { noinline optnone } +attributes #1 = { "memprof"="cold" } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits