[PATCH] D60162: [ThinLTO] Support TargetLibraryInfoImpl in the backend

Teresa Johnson via Phabricator via cfe-commits Mon, 13 May 2019 13:24:29 -0700

tejohnson updated this revision to Diff 199320.
tejohnson added a comment.
Herald added subscribers: cfe-commits, hiraditya, eraman.
Herald added a project: clang.


Rework using module flags.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60162/new/

https://reviews.llvm.org/D60162

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGen/nobuiltins.c
  clang/test/CodeGen/svml-calls.ll
  clang/test/CodeGen/thinlto_backend_nobuiltin.ll
  clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
  clang/test/CodeGen/veclib-calls.ll
  clang/test/CodeGen/veclib.c
  llvm/lib/LTO/LTOBackend.cpp

Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -218,6 +218,34 @@
   // FIXME (davide): verify the output.
 }
 
+static TargetLibraryInfoImpl *createTLII(Module &Mod, TargetMachine *TM) {
+  TargetLibraryInfoImpl *TLII =
+      new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          Mod.getModuleFlag("DisableAllBuiltins"))) {
+    if (MD->getZExtValue())
+      TLII->disableAllFunctions();
+  } else if (Metadata *Val = Mod.getModuleFlag("NoBuiltins")) {
+    // Disable individual libc/libm calls in TargetLibraryInfo.
+    LibFunc F;
+    for (const MDOperand &FuncName : cast<MDNode>(Val)->operands())
+      if (TLII->getLibFunc(cast<MDString>(*FuncName).getString(), F))
+        TLII->setUnavailable(F);
+  }
+
+  if (MDString *VL =
+          dyn_cast_or_null<MDString>(Mod.getModuleFlag("VectorLibrary"))) {
+    if (VL->getString() == "Accelerate")
+      TLII->addVectorizableFunctionsFromVecLib(
+          TargetLibraryInfoImpl::Accelerate);
+    else if (VL->getString() == "SVML")
+      TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
+    else
+      llvm_unreachable("Invalid vector library module flag");
+  }
+  return TLII;
+}
+
 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
                                  std::string PipelineDesc,
                                  std::string AAPipelineDesc,
@@ -239,6 +267,10 @@
   // Register the AA manager first so that our version is the one used.
   FAM.registerPass([&] { return std::move(AA); });
 
+  std::unique_ptr<TargetLibraryInfoImpl> TLII(createTLII(Mod, TM));
+  FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+  MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
   // Register all the basic analyses with the managers.
   PB.registerModuleAnalyses(MAM);
   PB.registerCGSCCAnalyses(CGAM);
@@ -268,7 +300,7 @@
   passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
 
   PassManagerBuilder PMB;
-  PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+  PMB.LibraryInfo = createTLII(Mod, TM);
   PMB.Inliner = createFunctionInliningPass();
   PMB.ExportSummary = ExportSummary;
   PMB.ImportSummary = ImportSummary;
Index: clang/test/CodeGen/veclib.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/veclib.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -emit-llvm -fveclib=Accelerate %s -o - | FileCheck %s -check-prefix=ACCELERATE
+// RUN: %clang_cc1 -emit-llvm -fveclib=SVML %s -o - | FileCheck %s -check-prefix=SVML
+// RUN: %clang_cc1 -emit-llvm -fveclib=none %s -o - | FileCheck %s -check-prefix=NONE
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE
+
+// ACCELERATE: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"}
+// SVML: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"SVML"}
+// NONE-NOT: VectorLibrary
+
+// Now ensure merging gets the expected behavior
+// RUN: %clang -c -flto %s -o %t0.o
+// RUN: %clang -c -flto -fveclib=Accelerate %s -o %t1.o
+// RUN: %clang -c -flto -fveclib=SVML %s -o %t2.o
+// Merge none with -fveclib=Accelerate -> VectorLibrary=Accelerate
+// RUN: llvm-lto %t0.o %t1.o -o %t3.o -save-merged-module
+// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=ACCELERATE
+// Merge none with -fveclib=SVML -> VectorLibrary=SVML
+// RUN: llvm-lto %t0.o %t2.o -o %t3.o -save-merged-module
+// RUN: llvm-dis %t3.o.merged.bc -o - | FileCheck %s --check-prefix=SVML
+// Merge -fveclib=Accelerate with -fveclib=SVML -> Error
+// RUN: not llvm-lto %t1.o %t2.o -o %t3.o -save-merged-module
Index: clang/test/CodeGen/veclib-calls.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/veclib-calls.ll
@@ -0,0 +1,38 @@
+; Test to ensure that -fveclib=Accelerate module flag is handled properly in
+; the ThinLTO distributed backend.
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @sqrt_f32(
+;CHECK: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @sqrtf(float) nounwind readnone
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %call = tail call float @sqrtf(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"VectorLibrary", !"Accelerate"}
Index: clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto_backend_nobuiltin_memset.ll
@@ -0,0 +1,25 @@
+; Make sure that -fno-builtin-memset module flag is handled properly in
+; the ThinLTO distributed backend.
+; REQUIRES: x86-registered-target
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i8* @memset(i8*, i8, i64)
+
+define i8* @test(i8* %mem, i8 %val, i64 %size) {
+  ; NOBUILTIN: call i8* @memset
+  %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size)
+  ret i8* %ret
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 6, !"NoBuiltins", !2}
+!2 = !{!"memset"}
+
Index: clang/test/CodeGen/thinlto_backend_nobuiltin.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto_backend_nobuiltin.ll
@@ -0,0 +1,23 @@
+; Make sure that -fno-builtin/-ffreestanding module flag is handled properly in
+; the ThinLTO distributed backend.
+; REQUIRES: x86-registered-target
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s --check-prefix=NOBUILTIN
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i8* @memset(i8*, i8, i64)
+
+define i8* @test(i8* %mem, i8 %val, i64 %size) {
+  ; NOBUILTIN: call i8* @memset
+  %ret = call i8* @memset(i8* %mem, i8 %val, i64 %size)
+  ret i8* %ret
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 7, !"DisableAllBuiltins", i32 1}
Index: clang/test/CodeGen/svml-calls.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/svml-calls.ll
@@ -0,0 +1,40 @@
+; Test to ensure that -fveclib=SVML module flag is handled properly in
+; the ThinLTO distributed backend.
+
+; RUN: opt -module-summary -o %t.o %s
+; RUN: llvm-lto -thinlto -o %t %t.o
+; RUN: %clang -target x86_64-unknown-linux-gnu -O3 -o %t2.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj -mllvm -force-vector-width=4 -mllvm -force-vector-interleave=1 -mavx
+; RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck %s
+
+; RUN opt -vector-library=SVML -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare double @sin(double) #0
+
+define void @sin_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sin_f64(
+; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @sin(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"VectorLibrary", !"SVML"}
Index: clang/test/CodeGen/nobuiltins.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/nobuiltins.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -emit-llvm -ffreestanding %s -o - | FileCheck %s -check-prefix=DISABLEALL
+// RUN: %clang_cc1 -emit-llvm -fno-builtin %s -o - | FileCheck %s -check-prefix=DISABLEALL
+// RUN: %clang_cc1 -emit-llvm -fno-builtin-memset -fno-builtin-memcpy %s -o - | FileCheck %s -check-prefix=DISABLESOME
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NONE
+
+// ACCELERATE-DAG: !{{[0-9]+}} = !{i32 1, !"VectorLibrary", !"Accelerate"}
+// DISABLEALL-DAG: !{{[0-9]+}} = !{i32 7, !"DisableAllBuiltins", i32 1}
+// DISABLESOME-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]]
+// DISABLESOME-DAG: ![[LIST]] = !{!"memset", !"memcpy"}
+// DISABLEMEMSET-DAG: !{{[0-9]+}} = !{i32 6, !"NoBuiltins", ![[LIST:[0-9]+]]
+// DISABLEMEMSET-DAG: ![[LIST]] = !{!"memset"}
+// NONE-NOT: Builtins
+
+// Now ensure merging gets the expected behavior
+// RUN: %clang -c -flto %s -o %t0.o
+// RUN: %clang -c -flto -fno-builtin %s -o %t1.o
+// RUN: %clang -c -flto -fno-builtin-memset %s -o %t2.o
+// RUN: %clang -c -flto -fno-builtin-memcpy %s -o %t3.o
+// Merge none with -fno-builtin -> DisableAllBuiltins set
+// RUN: llvm-lto %t0.o %t1.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL
+// Merge -fno-builtin -fno-builtin-memset -> Both DisableAllBuiltins set and
+// NoBuiltins=memset.
+// RUN: llvm-lto %t1.o %t2.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLEALL --check-prefix=DISABLEMEMSET
+// Merge -fno-builtin-memset -fno-builtin-memcpy -> NoBuiltins=memset,memcpy.
+// RUN: llvm-lto %t2.o %t3.o -o %t4.o -save-merged-module
+// RUN: llvm-dis %t4.o.merged.bc -o - | FileCheck %s --check-prefix=DISABLESOME
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -527,6 +527,32 @@
                               1);
   }
 
+  if (!CodeGenOpts.SimplifyLibCalls) {
+    // Indicate that all built in functions are disabled.
+    getModule().addModuleFlag(llvm::Module::Max, "DisableAllBuiltins", 1);
+  }
+
+  if (!CodeGenOpts.getNoBuiltinFuncs().empty()) {
+    std::vector<llvm::Metadata *> Nodes;
+    for (auto &FuncName : CodeGenOpts.getNoBuiltinFuncs())
+      Nodes.push_back(llvm::MDString::get(VMContext, FuncName));
+    getModule().addModuleFlag(llvm::Module::AppendUnique, "NoBuiltins",
+                              llvm::MDNode::get(VMContext, Nodes));
+  }
+
+  switch (CodeGenOpts.getVecLib()) {
+  case CodeGenOptions::Accelerate:
+    getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary",
+                              llvm::MDString::get(VMContext, "Accelerate"));
+    break;
+  case CodeGenOptions::SVML:
+    getModule().addModuleFlag(llvm::Module::Error, "VectorLibrary",
+                              llvm::MDString::get(VMContext, "SVML"));
+    break;
+  default:
+    break;
+  }
+
   if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) {
     // Indicate whether __nvvm_reflect should be configured to flush denormal
     // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60162: [ThinLTO] Support TargetLibraryInfoImpl in the backend

Reply via email to