tejohnson updated this revision to Diff 251893.
tejohnson added a comment.
Includes fixe for 2-stage clang bootstrap test failures and an expanded
fix for Chromium issue, plus new tests.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73242/new/
https://reviews.llvm.org/D73242
Files:
llvm/include/llvm/IR/ModuleSummaryIndex.h
llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/IR/AsmWriter.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Transforms/IPO/LowerTypeTests.cpp
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
llvm/test/Bitcode/summary_version.ll
llvm/test/Other/new-pm-lto-defaults.ll
llvm/test/Other/new-pm-thinlto-defaults.ll
llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
llvm/test/ThinLTO/X86/cfi-unsat.ll
llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
llvm/test/ThinLTO/X86/type_test_noindircall.ll
llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
Index: llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
+++ llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
@@ -25,7 +25,7 @@
%fptr = load i8*, i8** %fptrptr
%fptr_casted = bitcast i8* %fptr to i32 (i8*)*
%result = call i32 %fptr_casted(i8* %obj)
- ; CHECK-NOT: call
+ ; CHECK-NOT: call i32 %
; CHECK: ret i32 123
ret i32 %result
}
Index: llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
+++ llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
@@ -32,7 +32,7 @@
; SUMMARY-NEXT: TypeIdMap:
; SUMMARY-NEXT: typeid1:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
+++ llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
@@ -9,7 +9,7 @@
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid3:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -29,7 +29,7 @@
; SUMMARY-ARM-NEXT: Bit: 1
; SUMMARY-NEXT: typeid4:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
+++ llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
@@ -6,7 +6,7 @@
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid3:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -24,7 +24,7 @@
; SUMMARY-NEXT: Bit: 0
; SUMMARY-NEXT: typeid4:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
+++ llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
@@ -6,7 +6,7 @@
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid4:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
+++ llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
@@ -4,7 +4,7 @@
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid3:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -17,7 +17,7 @@
; SUMMARY-NEXT: ResByArg:
; SUMMARY-NEXT: typeid1:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -30,7 +30,7 @@
; SUMMARY-NEXT: ResByArg:
; SUMMARY-NEXT: typeid2:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -43,7 +43,7 @@
; SUMMARY-NEXT: ResByArg:
; SUMMARY-NEXT: typeid4:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
+++ llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
@@ -14,7 +14,7 @@
; RUN: -wholeprogramdevirt-summary-action=export -o /dev/null 2>&1 | FileCheck %s --check-prefix=MISSING-MODULE
; Check single impl devirtulation in summary
-; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid
+; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unknown, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid
; MISSING-MODULE: combined summary should contain Regular LTO module
Index: llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
===================================================================
--- llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
+++ llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
@@ -10,7 +10,7 @@
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid3:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -23,7 +23,7 @@
; SUMMARY-NEXT: ResByArg:
; SUMMARY-NEXT: typeid1:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
@@ -36,7 +36,7 @@
; SUMMARY-NEXT: ResByArg:
; SUMMARY-NEXT: typeid2:
; SUMMARY-NEXT: TTRes:
-; SUMMARY-NEXT: Kind: Unsat
+; SUMMARY-NEXT: Kind: Unknown
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: AlignLog2: 0
; SUMMARY-NEXT: SizeM1: 0
Index: llvm/test/ThinLTO/X86/type_test_noindircall.ll
===================================================================
--- /dev/null
+++ llvm/test/ThinLTO/X86/type_test_noindircall.ll
@@ -0,0 +1,50 @@
+; Test to ensure that we correctly handle a type test not used for a virtual call.
+; If it isn't removed correctly by WPD then we could incorrectly get an Unsat
+; (resulting in an unreachable in the IR).
+
+; REQUIRES: x86-registered-target
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
+; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
+; RUN: -whole-program-visibility \
+; RUN: -verify-machineinstrs=0 \
+; RUN: -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \
+; RUN: -o %t3
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%"class.llvm::vfs::File" = type { i32 (...)** }
+%"class.llvm::vfs::Status" = type <{ %"class.std::__cxx11::basic_string", %"class.llvm::sys::fs::UniqueID", %"struct.std::chrono::time_point", i32, i32, i64, i32, i32, i8, [7 x i8] }>
+%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+%"class.llvm::sys::fs::UniqueID" = type { i64, i64 }
+%"struct.std::chrono::time_point" = type { %"struct.std::chrono::duration" }
+%"struct.std::chrono::duration" = type { i64 }
+%"class.(anonymous namespace)::RealFile" = type { %"class.llvm::vfs::File", i32, [4 x i8], %"class.llvm::vfs::Status", %"class.std::__cxx11::basic_string" }
+
+@_ZTVN12_GLOBAL__N_18RealFileE = unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.(anonymous namespace)::RealFile"*)* @_ZN12_GLOBAL__N_18RealFileD2Ev to i8*)] }, align 8, !type !74
+
+define internal void @_ZN12_GLOBAL__N_18RealFileD2Ev(%"class.(anonymous namespace)::RealFile"* %this) unnamed_addr #0 align 2 {
+entry:
+; CHECK-IR: %0 = getelementptr
+ %0 = getelementptr %"class.(anonymous namespace)::RealFile", %"class.(anonymous namespace)::RealFile"* %this, i64 0, i32 0, i32 0
+; CHECK-IR-NEXT: store
+ store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+ %1 = tail call i1 @llvm.type.test(i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i8*), metadata !"4$09c6cc733fc6accb91e5d7b87cb48f2d")
+ tail call void @llvm.assume(i1 %1)
+; CHECK-IR-NEXT: ret void
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+; Make sure we don't inline or otherwise optimize out the direct calls.
+attributes #0 = { noinline optnone }
+
+!74 = !{i64 16, !"4$09c6cc733fc6accb91e5d7b87cb48f2d"}
Index: llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
===================================================================
--- llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
+++ llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
@@ -33,6 +33,8 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
[email protected]_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }]
+
%struct.D = type { i32 (...)** }
@_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
@@ -57,6 +59,23 @@
; CHECK-IR-LABEL: ret i32
; CHECK-IR-LABEL: }
+; Function Attrs: inlinehint nounwind uwtable
+define internal void @_ZN1DC2Ev(%struct.D* %this) unnamed_addr align 2 {
+entry:
+ %this.addr = alloca %struct.D*, align 8
+ store %struct.D* %this, %struct.D** %this.addr, align 8
+ %this1 = load %struct.D*, %struct.D** %this.addr
+ %0 = bitcast %struct.D* %this1 to i32 (...)***
+ store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1D, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+ ret void
+}
+
+define internal void @g() section ".text.startup" {
+ %d = alloca %struct.D, align 8
+ call void @_ZN1DC2Ev(%struct.D* %d)
+ ret void
+}
+
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
Index: llvm/test/ThinLTO/X86/cfi-unsat.ll
===================================================================
--- /dev/null
+++ llvm/test/ThinLTO/X86/cfi-unsat.ll
@@ -0,0 +1,82 @@
+; REQUIRES: x86-registered-target
+
+; Test CFI devirtualization through the thin link and backend when
+; a type id is Unsat (not used on any global's type metadata).
+;
+; In this test case, the first module is split and will import a resolution
+; for its type test. The resolution would be exported by the second
+; module, which is set up so that it does not get split (treated as regular
+; LTO because it does not have any external globals from which to create
+; a unique module ID). We should not actually get any resolution for the
+; type id in this case, since no globals include it in their type metadata,
+; so the resolution is Unsat and the type.checked.load instructions are
+; converted to type tests that evaluate to false.
+
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %p/Inputs/cfi-unsat.ll
+
+; RUN: llvm-lto2 run %t.o %t1.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN: -whole-program-visibility \
+; RUN: -o %t3 \
+; RUN: -r=%t.o,test2,px \
+; RUN: -r=%t1.o,_ZTV1B,px \
+; RUN: -r=%t1.o,test,px \
+; RUN: -r=%t1.o,testb,px
+; RUN: llvm-dis %t3.index.bc -o - | FileCheck %s --check-prefix=INDEX
+; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR0
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+
+; INDEX-NOT: "typeid:"
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+$test2 = comdat any
+
+define linkonce_odr i32 @test2(%struct.A* %obj, i32 %a) comdat {
+entry:
+ %0 = bitcast %struct.A* %obj to i8**
+ %vtable5 = load i8*, i8** %0
+
+ %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+ %2 = extractvalue { i8*, i1 } %1, 1
+ br i1 %2, label %cont, label %trap
+
+trap:
+ tail call void @llvm.trap()
+ unreachable
+
+cont:
+ %3 = extractvalue { i8*, i1 } %1, 0
+ %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+ %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+ ret i32 %call
+}
+
+; CHECK-IR0: define weak_odr i32 @test
+; CHECK-IR0-NEXT: entry:
+; CHECK-IR0-NEXT: %0 = bitcast
+; CHECK-IR0-NEXT: %vtable5 =
+; CHECK-IR0-NEXT: tail call void @llvm.trap()
+; CHECK-IR0-NEXT: unreachable
+; CHECK-IR0-NEXT: }
+; CHECK-IR0: define weak_odr i32 @testb
+; CHECK-IR0-NEXT: entry:
+; CHECK-IR0-NEXT: %0 = bitcast
+; CHECK-IR0-NEXT: %vtable5 =
+; CHECK-IR0-NEXT: tail call void @llvm.trap()
+; CHECK-IR0-NEXT: unreachable
+; CHECK-IR0-NEXT: }
+
+; CHECK-IR1: define weak_odr i32 @test2
+; CHECK-IR1-NEXT: entry:
+; CHECK-IR1-NEXT: tail call void @llvm.trap()
+; CHECK-IR1-NEXT: unreachable
+; CHECK-IR1-NEXT: }
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
Index: llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
===================================================================
--- /dev/null
+++ llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
@@ -0,0 +1,68 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { i32 (...)** }
+
+@_ZTV1B = linkonce_odr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0
+
+$test = comdat any
+$testb = comdat any
+
+define linkonce_odr i32 @test(%struct.A* %obj, i32 %a) comdat {
+entry:
+ %0 = bitcast %struct.A* %obj to i8**
+ %vtable5 = load i8*, i8** %0
+
+ %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+ %2 = extractvalue { i8*, i1 } %1, 1
+ br i1 %2, label %cont, label %trap
+
+trap:
+ tail call void @llvm.trap()
+ unreachable
+
+cont:
+ %3 = extractvalue { i8*, i1 } %1, 0
+ %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+ %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+ ret i32 %call
+}
+
+define linkonce_odr i32 @testb(%struct.A* %obj, i32 %a) comdat {
+entry:
+ %0 = bitcast %struct.A* %obj to i8**
+ %vtable5 = load i8*, i8** %0
+
+ %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 0, metadata !"_ZTS1A")
+ %2 = extractvalue { i8*, i1 } %1, 1
+ br i1 %2, label %cont, label %trap
+
+trap:
+ tail call void @llvm.trap()
+ unreachable
+
+cont:
+ %3 = extractvalue { i8*, i1 } %1, 0
+ %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+ %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+ ret i32 %call
+}
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) {
+entry:
+ ret i32 0
+}
+define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) {
+entry:
+ ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS1B"}
Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -60,6 +60,7 @@
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
; CHECK-O-NEXT: Running pass: AttributorPass
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -49,6 +49,7 @@
; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass
; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
; CHECK-O-NEXT: Running pass: AttributorPass
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-thinlto-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -80,6 +80,7 @@
; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: AttributorPass
+; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
Index: llvm/test/Other/new-pm-lto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-lto-defaults.ll
+++ llvm/test/Other/new-pm-lto-defaults.ll
@@ -92,6 +92,7 @@
; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass>
; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass
; CHECK-O2-NEXT: Running pass: GlobalDCEPass
Index: llvm/test/Bitcode/summary_version.ll
===================================================================
--- llvm/test/Bitcode/summary_version.ll
+++ llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=8/>
+; CHECK: <VERSION op0=9/>
Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
===================================================================
--- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -536,7 +536,9 @@
bool areRemarksEnabled();
- void scanTypeTestUsers(Function *TypeTestFunc);
+ void
+ scanTypeTestUsers(Function *TypeTestFunc,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
void buildTypeIdentifierMap(
@@ -1704,7 +1706,9 @@
return false;
}
-void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
+void DevirtModule::scanTypeTestUsers(
+ Function *TypeTestFunc,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
// points to a member of the type identifier %md. Group calls by (type ID,
@@ -1724,10 +1728,10 @@
auto &DT = LookupDomTree(*CI->getFunction());
findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+ Metadata *TypeId =
+ cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
// If we found any, add them to CallSlots.
if (!Assumes.empty()) {
- Metadata *TypeId =
- cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
for (DevirtCallSite Call : DevirtCalls) {
// Only add this CallSite if we haven't seen it before. The vtable
@@ -1740,13 +1744,50 @@
}
}
- // We no longer need the assumes or the type test.
- for (auto Assume : Assumes)
- Assume->eraseFromParent();
- // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
- // may use the vtable argument later.
- if (CI->use_empty())
- CI->eraseFromParent();
+ auto RemoveTypeTestAssumes = [&]() {
+ // We no longer need the assumes or the type test.
+ for (auto Assume : Assumes)
+ Assume->eraseFromParent();
+ // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
+ // may use the vtable argument later.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+ };
+
+ // At this point we could remove all type test assume sequences, as they
+ // were originally inserted for WPD. However, we can keep these in the
+ // code stream for later analysis (e.g. to help drive more efficient ICP
+ // sequences). They will eventually be removed by a second LowerTypeTests
+ // invocation that cleans them up. In order to do this correctly, the first
+ // LowerTypeTests invocation needs to know that they have "Unknown" type
+ // test resolution, so that they aren't treated as Unsat and lowered to
+ // False, which will break any uses on assumes. Below we remove any type
+ // test assumes that will not be treated as Unknown by LTT.
+
+ // The type test assumes will be treated by LTT as Unsat if the type id is
+ // not used on a global (in which case it has no entry in the TypeIdMap).
+ if (!TypeIdMap.count(TypeId))
+ RemoveTypeTestAssumes();
+
+ // For ThinLTO importing, we need to remove the type test assumes if this is
+ // an MDString type id without a corresponding TypeIdSummary. Any
+ // non-MDString type ids are ignored and treated as Unknown by LTT, so their
+ // type test assumes can be kept. If the MDString type id is missing a
+ // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
+ // exporting phase of WPD from analyzing it), then it would be treated as
+ // Unsat by LTT and we need to remove its type test assumes here. If not
+ // used on a vcall we don't need them for later optimization use in any
+ // case.
+ else if (ImportSummary && isa<MDString>(TypeId)) {
+ const TypeIdSummary *TidSummary =
+ ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
+ if (!TidSummary)
+ RemoveTypeTestAssumes();
+ else
+ // If one was created it should not be Unsat, because if we reached here
+ // the type id was used on a global.
+ assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);
+ }
}
}
@@ -1938,8 +1979,13 @@
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
+ // Rebuild type metadata into a map for easy lookup.
+ std::vector<VTableBits> Bits;
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
+ buildTypeIdentifierMap(Bits, TypeIdMap);
+
if (TypeTestFunc && AssumeFunc)
- scanTypeTestUsers(TypeTestFunc);
+ scanTypeTestUsers(TypeTestFunc, TypeIdMap);
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
@@ -1961,10 +2007,6 @@
return true;
}
- // Rebuild type metadata into a map for easy lookup.
- std::vector<VTableBits> Bits;
- DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
- buildTypeIdentifierMap(Bits, TypeIdMap);
if (TypeIdMap.empty())
return true;
@@ -2021,14 +2063,22 @@
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
- if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ WholeProgramDevirtResolution *Res = nullptr;
+ const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
+ if (ExportSummary && isa<MDString>(S.first.TypeID) &&
+ TypeMemberInfos.size())
+ // For any type id used on a global's type metadata, create the type id
+ // summary resolution regardless of whether we can devirtualize, so that
+ // lower type tests knows the type id is not Unsat. If it was not used on
+ // a global's type metadata, the TypeIdMap entry set will be empty, and
+ // we don't want to create an entry (with the default Unknown type
+ // resolution), which can prevent detection of the Unsat.
+ Res = &ExportSummary
+ ->getOrInsertTypeIdSummary(
+ cast<MDString>(S.first.TypeID)->getString())
+ .WPDRes[S.first.ByteOffset];
+ if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset)) {
- WholeProgramDevirtResolution *Res = nullptr;
- if (ExportSummary && isa<MDString>(S.first.TypeID))
- Res = &ExportSummary
- ->getOrInsertTypeIdSummary(
- cast<MDString>(S.first.TypeID)->getString())
- .WPDRes[S.first.ByteOffset];
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
DidVirtualConstProp |=
@@ -2142,11 +2192,14 @@
std::vector<ValueInfo> TargetsForSlot;
auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
assert(TidSummary);
+ // Create the type id summary resolution regardlness of whether we can
+ // devirtualize, so that lower type tests knows the type id is used on
+ // a global and not Unsat.
+ WholeProgramDevirtResolution *Res =
+ &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+ .WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
S.first.ByteOffset)) {
- WholeProgramDevirtResolution *Res =
- &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
- .WPDRes[S.first.ByteOffset];
if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
DevirtTargets))
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -504,6 +504,7 @@
MPM.add(createBarrierNoopPass());
if (PerformThinLTO) {
+ MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
// Drop available_externally and unreferenced globals. This is necessary
// with ThinLTO in order to avoid leaving undefined references to dead
// globals in the object file.
@@ -537,9 +538,11 @@
// inter-module indirect calls. For that we perform indirect call promotion
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
- if (PerformThinLTO)
+ if (PerformThinLTO) {
MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
!PGOSampleUse.empty()));
+ MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
+ }
// For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
// as it will change the CFG too much to make the 2nd profile annotation
@@ -1059,8 +1062,8 @@
PM.add(createVerifierPass());
if (ImportSummary) {
- // These passes import type identifier resolutions for whole-program
- // devirtualization and CFI. They must run early because other passes may
+ // This pass imports type identifier resolutions for whole-program
+ // devirtualization and CFI. It must run early because other passes may
// disturb the specific instruction patterns that these passes look for,
// creating dependencies on resolutions that may not appear in the summary.
//
@@ -1108,6 +1111,9 @@
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
Index: llvm/lib/Transforms/IPO/LowerTypeTests.cpp
===================================================================
--- llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -735,6 +735,9 @@
/// replace the call with.
Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
const TypeIdLowering &TIL) {
+ // Delay lowering if the resolution is currently unknown.
+ if (TIL.TheKind == TypeTestResolution::Unknown)
+ return nullptr;
if (TIL.TheKind == TypeTestResolution::Unsat)
return ConstantInt::getFalse(M.getContext());
@@ -1037,14 +1040,18 @@
report_fatal_error("Second argument of llvm.type.test must be metadata");
auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+ // If this is a local unpromoted type, which doesn't have a metadata string,
+ // treat as Unknown and delay lowering, so that we can still utilize it for
+ // later optimizations.
if (!TypeIdStr)
- report_fatal_error(
- "Second argument of llvm.type.test must be a metadata string");
+ return;
TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
+ if (Lowered) {
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
}
// ThinLTO backend: the function F has a jump table entry; update this module
@@ -1167,8 +1174,10 @@
for (CallInst *CI : TIUI.CallSites) {
++NumTypeTestCallsLowered;
Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
+ if (Lowered) {
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
}
}
}
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -762,6 +762,12 @@
}
MPM.addPass(AttributorPass());
+ // Lower type metadata and the type.test intrinsic in the ThinLTO
+ // post link pipeline after ICP. This is to enable usage of the type
+ // tests in ICP sequences.
+ if (Phase == ThinLTOPhase::PostLink)
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
@@ -1207,6 +1213,9 @@
// metadata and intrinsics.
MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
return MPM;
}
@@ -1273,6 +1282,10 @@
// The LowerTypeTestsPass needs to run to lower type metadata and the
// type.test intrinsics. The pass does nothing if CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO
+ // pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
return MPM;
}
@@ -1400,6 +1413,9 @@
// to be run at link time if CFI is enabled. This pass does nothing if
// CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
// Enable splitting late in the FullLTO post-link pipeline. This is done in
// the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
Index: llvm/lib/IR/AsmWriter.cpp
===================================================================
--- llvm/lib/IR/AsmWriter.cpp
+++ llvm/lib/IR/AsmWriter.cpp
@@ -2782,6 +2782,8 @@
static const char *getTTResKindName(TypeTestResolution::Kind K) {
switch (K) {
+ case TypeTestResolution::Unknown:
+ return "unknown";
case TypeTestResolution::Unsat:
return "unsat";
case TypeTestResolution::ByteArray:
Index: llvm/lib/AsmParser/LLParser.cpp
===================================================================
--- llvm/lib/AsmParser/LLParser.cpp
+++ llvm/lib/AsmParser/LLParser.cpp
@@ -7668,6 +7668,9 @@
return true;
switch (Lex.getKind()) {
+ case lltok::kw_unknown:
+ TTRes.TheKind = TypeTestResolution::Unknown;
+ break;
case lltok::kw_unsat:
TTRes.TheKind = TypeTestResolution::Unsat;
break;
Index: llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
===================================================================
--- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -17,6 +17,7 @@
template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
static void enumeration(IO &io, TypeTestResolution::Kind &value) {
+ io.enumCase(value, "Unknown", TypeTestResolution::Unknown);
io.enumCase(value, "Unsat", TypeTestResolution::Unsat);
io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray);
io.enumCase(value, "Inline", TypeTestResolution::Inline);
Index: llvm/include/llvm/IR/ModuleSummaryIndex.h
===================================================================
--- llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -833,7 +833,8 @@
Single, ///< Single element (last example in "Short Inline Bit Vectors")
AllOnes, ///< All-ones bit vector ("Eliminating Bit Vector Checks for
/// All-Ones Bit Vectors")
- } TheKind = Unsat;
+ Unknown, ///< Unknown (analysis not performed, don't lower)
+ } TheKind = Unknown;
/// Range of size-1 expressed as a bit width. For example, if the size is in
/// range [1,256], this number will be 8. This helps generate the most compact
@@ -1027,7 +1028,7 @@
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
- static constexpr uint64_t BitcodeSummaryVersion = 8;
+ static constexpr uint64_t BitcodeSummaryVersion = 9;
// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits