tejohnson updated this revision to Diff 199320.
tejohnson added a comment.
Herald added subscribers: cfe-commits, hiraditya, eraman.
Herald added a project: clang.
Rework using module flags.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D60162/new/
https://reviews.llvm.org/D60162
Files:
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGen/nobuiltins.c
clang/test/CodeGen/svml-calls.ll
clang/test/CodeGen/thinlto_backend_nobuiltin.ll
clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
clang/test/CodeGen/veclib-calls.ll
clang/test/CodeGen/veclib.c
llvm/lib/LTO/LTOBackend.cpp
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -218,6 +218,34 @@
// FIXME (davide): verify the output.
}
+static TargetLibraryInfoImpl *createTLII(Module &Mod, TargetMachine *TM) {
+ TargetLibraryInfoImpl *TLII =
+ new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ Mod.getModuleFlag("DisableAllBuiltins"))) {
+ if (MD->getZExtValue())
+ TLII->disableAllFunctions();
+ } else if (Metadata *Val = Mod.getModuleFlag("NoBuiltins")) {
+ // Disable individual libc/libm calls in TargetLibraryInfo.
+ LibFunc F;
+ for (const MDOperand &FuncName : cast<MDNode>(Val)->operands())
+ if (TLII->getLibFunc(cast<MDString>(*FuncName).getString(), F))
+ TLII->setUnavailable(F);
+ }
+
+ if (MDString *VL =
+ dyn_cast_or_null<MDString>(Mod.getModuleFlag("VectorLibrary"))) {
+ if (VL->getString() == "Accelerate")
+ TLII->addVectorizableFunctionsFromVecLib(
+ TargetLibraryInfoImpl::Accelerate);
+ else if (VL->getString() == "SVML")
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
+ else
+ llvm_unreachable("Invalid vector library module flag");
+ }
+ return TLII;
+}
+
static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
std::string PipelineDesc,
std::string AAPipelineDesc,
@@ -239,6 +267,10 @@
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
+ std::unique_ptr<TargetLibraryInfoImpl> TLII(createTLII(Mod, TM));
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+ MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
PB.registerCGSCCAnalyses(CGAM);
@@ -268,7 +300,7 @@
passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
PassManagerBuilder PMB;
- PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+ PMB.LibraryInfo = createTLII(Mod, TM);
PMB.Inliner = createFunctionInliningPass();
PMB.ExportSummary = ExportSummary;
PMB.ImportSummary = ImportSummary;
Index: clang/test/CodeGen/veclib.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/veclib.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -emit-llvm -fveclib=Accelerate %s -o - | FileCheck %s -check-prefix=ACCELERATE
+// RUN: %clang_cc1 -emit-llvm -fveclib=SVML %s -o - | FileCheck %s -check-prefix=SVML
+// RUN: %clang_cc1 -emit-llvm -fveclib=none %s -o - | FileCheck %s -check-prefix=NONE
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE
+
+// ACCELERATE: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"}
+// SVML: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"SVML"}
+// NONE-NOT: VectorLibrary
+
+// Now ensure merging gets the expected behavior
+// RUN: %clang -c -flto %s -o %t0.o
+// RUN: %clang -c -flto -fveclib=Accelerate %s -o %t1.o
+// RUN: %clang -c -flto -fveclib=SVML %s -o %t2.o
+// Merge none with -fveclib=Accelerate -> VectorLibrary=Accelerate
+// RUN: llvm-lto %t0.o %t1.o -o %t3.o -save-merged-module
+// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=ACCELERATE
+// Merge none with -fveclib=SVML -> VectorLibrary=SVML
+// RUN: llvm-lto %t0.o %t2.o -o %t3.o -save-merged-module
+// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=SVML
+// Merge -fveclib=Accelerate with -fveclib=SVML -> Error
+// RUN: not llvm-lto %t1.o %t2.o -o %t3.o -save-merged-module
Index: clang/test/CodeGen/veclib-calls.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/veclib-calls.ll
@@ -0,0 +1,38 @@
+; Test to ensure that -fveclib=Accelerate module flag is handled properly in
+; the ThinLTO distributed backend.
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @sqrt_f32(
+;CHECK: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @sqrtf(float) nounwind readnone
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @sqrtf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"VectorLibrary", !"Accelerate"}
Index: clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
@@ -0,0 +1,25 @@
+; Make sure that -fno-builtin-memset module flag is handled properly in
+; the ThinLTO distributed backend.
+; REQUIRES: x86-registered-target
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i8* @memset(i8*, i8, i64)
+
+define i8* @test(i8* %mem, i8 %val, i64 %size) {
+ ; NOBUILTIN: call i8* @memset
+ %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size)
+ ret i8* %ret
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 6, !"NoBuiltins", !2}
+!2 = !{!"memset"}
+
Index: clang/test/CodeGen/thinlto_backend_nobuiltin.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto_backend_nobuiltin.ll
@@ -0,0 +1,23 @@
+; Make sure that -fno-builtin/-ffreestanding module flag is handled properly in
+; the ThinLTO distributed backend.
+; REQUIRES: x86-registered-target
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i8* @memset(i8*, i8, i64)
+
+define i8* @test(i8* %mem, i8 %val, i64 %size) {
+ ; NOBUILTIN: call i8* @memset
+ %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size)
+ ret i8* %ret
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 7, !"DisableAllBuiltins", i32 1}
Index: clang/test/CodeGen/svml-calls.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/svml-calls.ll
@@ -0,0 +1,40 @@
+; Test to ensure that -fveclib=SVML module flag is handled properly in
+; the ThinLTO distributed backend.
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj -mllvm -force-vector-width=4 -mllvm -force-vector-interleave=1 -mavx
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s
+
+; RUN opt -vector-library=SVML -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare double @sin(double) #0
+
+define void @sin_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sin_f64(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sin(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"VectorLibrary", !"SVML"}
Index: clang/test/CodeGen/nobuiltins.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/nobuiltins.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -emit-llvm -ffreestanding %s -o - | FileCheck %s -check-prefix=DISABLEALL
+// RUN: %clang_cc1 -emit-llvm -fno-builtin %s -o - | FileCheck %s -check-prefix=DISABLEALL
+// RUN: %clang_cc1 -emit-llvm -fno-builtin-memset -fno-builtin-memcpy %s -o - | FileCheck %s -check-prefix=DISABLESOME
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE
+
+// ACCELERATE-DAG: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"}
+// DISABLEALL-DAG: !{{[0-9]+}} = !{i32 7, !"DisableAllBuiltins", i32 1}
+// DISABLESOME-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]]
+// DISABLESOME-DAG: ![[LIST]] = !{!"memset", !"memcpy"}
+// DISABLEMEMSET-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]]
+// DISABLEMEMSET-DAG: ![[LIST]] = !{!"memset"}
+// NONE-NOT: Builtins
+
+// Now ensure merging gets the expected behavior
+// RUN: %clang -c -flto %s -o %t0.o
+// RUN: %clang -c -flto -fno-builtin %s -o %t1.o
+// RUN: %clang -c -flto -fno-builtin-memset %s -o %t2.o
+// RUN: %clang -c -flto -fno-builtin-memcpy %s -o %t3.o
+// Merge none with -fno-builtin -> DisableAllBuiltins set
+// RUN: llvm-lto %t0.o %t1.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL
+// Merge -fno-builtin -fno-builtin-memset -> Both DisableAllBuiltins set and
+// NoBuiltins=memset.
+// RUN: llvm-lto %t1.o %t2.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL --check-prefix=DISABLEMEMSET
+// Merge -fno-builtin-memset -fno-builtin-memcpy -> NoBuiltins=memset,memcpy.
+// RUN: llvm-lto %t2.o %t3.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLESOME
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -527,6 +527,32 @@
1);
}
+ if (!CodeGenOpts.SimplifyLibCalls) {
+ // Indicate that all built in functions are disabled.
+ getModule().addModuleFlag(llvm::Module::Max, "DisableAllBuiltins", 1);
+ }
+
+ if (!CodeGenOpts.getNoBuiltinFuncs().empty()) {
+ std::vector<llvm::Metadata *> Nodes;
+ for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs())
+ Nodes.push_back(llvm::MDString::get(VMContext, FuncName));
+ getModule().addModuleFlag(llvm::Module::AppendUnique, "NoBuiltins",
+ llvm::MDNode::get(VMContext, Nodes));
+ }
+
+ switch (CodeGenOpts.getVecLib()) {
+ case CodeGenOptions::Accelerate:
+ getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary",
+ llvm::MDString::get(VMContext, "Accelerate"));
+ break;
+ case CodeGenOptions::SVML:
+ getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary",
+ llvm::MDString::get(VMContext, "SVML"));
+ break;
+ default:
+ break;
+ }
+
if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) {
// Indicate whether __nvvm_reflect should be configured to flush denormal
// floating point values to 0. (This corresponds to its "__CUDA_FTZ"
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits