https://github.com/AlexisPerry updated https://github.com/llvm/llvm-project/pull/98517
>From 2e26f0f66f070cd0b684531efc63e63e2e584dfa Mon Sep 17 00:00:00 2001 From: Alexis Perry-Holby <ape...@lanl.gov> Date: Thu, 11 Jul 2024 12:51:39 -0600 Subject: [PATCH 1/2] Add basic -mtune support Initial implementation for the -mtune flag in Flang. --- clang/include/clang/Driver/Options.td | 7 +++--- clang/lib/Driver/ToolChains/Flang.cpp | 10 +++++++- flang/include/flang/Frontend/TargetOptions.h | 3 +++ flang/include/flang/Lower/Bridge.h | 6 ++--- .../flang/Optimizer/CodeGen/CGPasses.td | 4 +++ .../include/flang/Optimizer/CodeGen/Target.h | 21 ++++++++++++++-- .../Optimizer/Dialect/Support/FIRContext.h | 7 ++++++ .../flang/Optimizer/Transforms/Passes.td | 5 +++- flang/lib/Frontend/CompilerInvocation.cpp | 4 +++ flang/lib/Frontend/FrontendActions.cpp | 3 ++- flang/lib/Lower/Bridge.cpp | 3 ++- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 6 ++++- flang/lib/Optimizer/CodeGen/Target.cpp | 11 ++++++++ flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 12 ++++++++- flang/lib/Optimizer/CodeGen/TypeConverter.cpp | 3 ++- .../Optimizer/Dialect/Support/FIRContext.cpp | 18 +++++++++++++ flang/test/Driver/tune-cpu-fir.f90 | 25 +++++++++++++++++++ flang/test/Lower/tune-cpu-llvm.f90 | 8 ++++++ flang/tools/bbc/bbc.cpp | 3 ++- flang/tools/tco/tco.cpp | 4 +++ flang/unittests/Optimizer/FIRContextTest.cpp | 5 +++- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 1 + mlir/lib/Target/LLVMIR/ModuleImport.cpp | 5 ++++ mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 3 +++ mlir/test/Target/LLVMIR/Import/tune-cpu.ll | 16 ++++++++++++ mlir/test/Target/LLVMIR/tune-cpu.mlir | 14 +++++++++++ 26 files changed, 190 insertions(+), 17 deletions(-) create mode 100644 flang/test/Driver/tune-cpu-fir.f90 create mode 100644 flang/test/Lower/tune-cpu-llvm.f90 create mode 100644 mlir/test/Target/LLVMIR/Import/tune-cpu.ll create mode 100644 mlir/test/Target/LLVMIR/tune-cpu.mlir diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cfb37b3c5b474..8d49a4708aaf0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5436,6 +5436,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[]>, HelpText<"Provide information about a particular module file">; def mthumb : Flag<["-"], "mthumb">, Group<m_Group>; def mtune_EQ : Joined<["-"], "mtune=">, Group<m_Group>, + Visibility<[ClangOption, FlangOption]>, HelpText<"Only supported on AArch64, PowerPC, RISC-V, SPARC, SystemZ, and X86">; def multi__module : Flag<["-"], "multi_module">; def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">; @@ -6760,9 +6761,6 @@ def emit_hlfir : Flag<["-"], "emit-hlfir">, Group<Action_Group>, let Visibility = [CC1Option, CC1AsOption] in { -def tune_cpu : Separate<["-"], "tune-cpu">, - HelpText<"Tune for a specific cpu type">, - MarshallingInfoString<TargetOpts<"TuneCPU">>; def target_abi : Separate<["-"], "target-abi">, HelpText<"Target a particular ABI type">, MarshallingInfoString<TargetOpts<"ABI">>; @@ -6789,6 +6787,9 @@ def darwin_target_variant_triple : Separate<["-"], "darwin-target-variant-triple let Visibility = [CC1Option, CC1AsOption, FC1Option] in { +def tune_cpu : Separate<["-"], "tune-cpu">, + HelpText<"Tune for a specific cpu type">, + MarshallingInfoString<TargetOpts<"TuneCPU">>; def target_cpu : Separate<["-"], "target-cpu">, HelpText<"Target a specific cpu type">, MarshallingInfoString<TargetOpts<"CPU">>; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index ee8292a508f93..7e42bad258cc6 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -15,6 +15,7 @@ #include "llvm/Frontend/Debug/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +#include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/RISCVTargetParser.h" @@ -411,6 +412,13 @@ void Flang::addTargetOptions(const ArgList &Args, } // TODO: Add target specific flags, ABI, mtune option etc. + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { + CmdArgs.push_back("-tune-cpu"); + if (A->getValue() == StringRef{"native"}) + CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); + else + CmdArgs.push_back(A->getValue()); + } } void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs, @@ -807,7 +815,7 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, case CodeGenOptions::FramePointerKind::None: FPKeepKindStr = "-mframe-pointer=none"; break; - case CodeGenOptions::FramePointerKind::Reserved: + case CodeGenOptions::FramePointerKind::Reserved: FPKeepKindStr = "-mframe-pointer=reserved"; break; case CodeGenOptions::FramePointerKind::NonLeaf: diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h index ef5d270a2185d..fa72c77a028a1 100644 --- a/flang/include/flang/Frontend/TargetOptions.h +++ b/flang/include/flang/Frontend/TargetOptions.h @@ -32,6 +32,9 @@ class TargetOptions { /// If given, the name of the target CPU to generate code for. std::string cpu; + /// If given, the name of the target CPU to tune code for. + std::string cpuToTuneFor; + /// The list of target specific features to enable or disable, as written on /// the command line. std::vector<std::string> featuresAsWritten; diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h index 52110b861b680..4379ed512cdf0 100644 --- a/flang/include/flang/Lower/Bridge.h +++ b/flang/include/flang/Lower/Bridge.h @@ -65,11 +65,11 @@ class LoweringBridge { const Fortran::lower::LoweringOptions &loweringOptions, const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults, const Fortran::common::LanguageFeatureControl &languageFeatures, - const llvm::TargetMachine &targetMachine) { + const llvm::TargetMachine &targetMachine, llvm::StringRef tuneCPU) { return LoweringBridge(ctx, semanticsContext, defaultKinds, intrinsics, targetCharacteristics, allCooked, triple, kindMap, loweringOptions, envDefaults, languageFeatures, - targetMachine); + targetMachine, tuneCPU); } //===--------------------------------------------------------------------===// @@ -148,7 +148,7 @@ class LoweringBridge { const Fortran::lower::LoweringOptions &loweringOptions, const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults, const Fortran::common::LanguageFeatureControl &languageFeatures, - const llvm::TargetMachine &targetMachine); + const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU); LoweringBridge() = delete; LoweringBridge(const LoweringBridge &) = delete; diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td index 9a4d327b33bad..989e3943882a1 100644 --- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td +++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td @@ -31,6 +31,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> { "Override module's data layout.">, Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"", "Override module's target CPU.">, + Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"", + "Override module's tune CPU.">, Option<"forcedTargetFeatures", "target-features", "std::string", /*default=*/"", "Override module's target features.">, Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false", @@ -68,6 +70,8 @@ def TargetRewritePass : Pass<"target-rewrite", "mlir::ModuleOp"> { "Override module's target triple.">, Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"", "Override module's target CPU.">, + Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"", + "Override module's tune CPU.">, Option<"forcedTargetFeatures", "target-features", "std::string", /*default=*/"", "Override module's target features.">, Option<"noCharacterConversion", "no-character-conversion", diff --git a/flang/include/flang/Optimizer/CodeGen/Target.h b/flang/include/flang/Optimizer/CodeGen/Target.h index 3cf6a74a9adb7..2b3b2152ac80c 100644 --- a/flang/include/flang/Optimizer/CodeGen/Target.h +++ b/flang/include/flang/Optimizer/CodeGen/Target.h @@ -76,6 +76,11 @@ class CodeGenSpecifics { llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures, const mlir::DataLayout &dl); + static std::unique_ptr<CodeGenSpecifics> + get(mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap, + llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures, + const mlir::DataLayout &dl, llvm::StringRef tuneCPU); + static TypeAndAttr getTypeAndAttr(mlir::Type t) { return TypeAndAttr{t, {}}; } CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp, @@ -83,7 +88,17 @@ class CodeGenSpecifics { mlir::LLVM::TargetFeaturesAttr targetFeatures, const mlir::DataLayout &dl) : context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)}, - targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl} {} + targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl}, + tuneCPU{""} {} + + CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp, + KindMapping &&kindMap, llvm::StringRef targetCPU, + mlir::LLVM::TargetFeaturesAttr targetFeatures, + const mlir::DataLayout &dl, llvm::StringRef tuneCPU) + : context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)}, + targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl}, + tuneCPU{tuneCPU} {} + CodeGenSpecifics() = delete; virtual ~CodeGenSpecifics() {} @@ -165,7 +180,8 @@ class CodeGenSpecifics { virtual unsigned char getCIntTypeWidth() const = 0; llvm::StringRef getTargetCPU() const { return targetCPU; } - + llvm::StringRef getTuneCPU() const { return tuneCPU; } + mlir::LLVM::TargetFeaturesAttr getTargetFeatures() const { return targetFeatures; } @@ -182,6 +198,7 @@ class CodeGenSpecifics { llvm::StringRef targetCPU; mlir::LLVM::TargetFeaturesAttr targetFeatures; const mlir::DataLayout *dataLayout = nullptr; + llvm::StringRef tuneCPU; }; } // namespace fir diff --git a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h index 059a10ce2fe51..b69f1415040ec 100644 --- a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h +++ b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h @@ -58,6 +58,13 @@ void setTargetCPU(mlir::ModuleOp mod, llvm::StringRef cpu); /// Get the target CPU string from the Module or return a null reference. llvm::StringRef getTargetCPU(mlir::ModuleOp mod); +/// Set the tune CPU for the module. `cpu` must not be deallocated while +/// module `mod` is still live. +void setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu); + +/// Get the tune CPU string from the Module or return a null reference. +llvm::StringRef getTuneCPU(mlir::ModuleOp mod); + /// Set the target features for the module. void setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index b3ed9acad36df..786083f95e15c 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -411,7 +411,10 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> { Option<"unsafeFPMath", "unsafe-fp-math", "bool", /*default=*/"false", "Set the unsafe-fp-math attribute on functions in the module.">, - ]; + Option<"tuneCPU", "tune-cpu", + "llvm::StringRef", /*default=*/"llvm::StringRef{}", + "Set the tune-cpu attribute on functions in the module.">, +]; } def AssumedRankOpConversion : Pass<"fir-assumed-rank-op", "mlir::ModuleOp"> { diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index e2d60ad46f14f..3d66a946fc946 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -431,6 +431,10 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) { args.getLastArg(clang::driver::options::OPT_target_cpu)) opts.cpu = a->getValue(); + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_tune_cpu)) + opts.cpuToTuneFor = a->getValue(); + for (const llvm::opt::Arg *currentArg : args.filtered(clang::driver::options::OPT_target_feature)) opts.featuresAsWritten.emplace_back(currentArg->getValue()); diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index a85ecd1ac71b3..5c86bd947ce73 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -297,7 +297,8 @@ bool CodeGenAction::beginSourceFileAction() { ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple, kindMap, ci.getInvocation().getLoweringOpts(), ci.getInvocation().getFrontendOpts().envDefaults, - ci.getInvocation().getFrontendOpts().features, targetMachine); + ci.getInvocation().getFrontendOpts().features, targetMachine, + ci.getInvocation().getTargetOpts().cpuToTuneFor); // Fetch module from lb, so we can set mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule()); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 3d071f6bb8d5a..b998709dccd8c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -6020,7 +6020,7 @@ Fortran::lower::LoweringBridge::LoweringBridge( const Fortran::lower::LoweringOptions &loweringOptions, const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults, const Fortran::common::LanguageFeatureControl &languageFeatures, - const llvm::TargetMachine &targetMachine) + const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU) : semanticsContext{semanticsContext}, defaultKinds{defaultKinds}, intrinsics{intrinsics}, targetCharacteristics{targetCharacteristics}, cooked{&cooked}, context{context}, kindMap{kindMap}, @@ -6077,6 +6077,7 @@ Fortran::lower::LoweringBridge::LoweringBridge( fir::setTargetTriple(*module.get(), triple); fir::setKindMapping(*module.get(), kindMap); fir::setTargetCPU(*module.get(), targetMachine.getTargetCPU()); + fir::setTuneCPU(*module.get(), tuneCPU); fir::setTargetFeatures(*module.get(), targetMachine.getTargetFeatureString()); fir::support::setMLIRDataLayout(*module.get(), targetMachine.createDataLayout()); diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 7483acfcd1ca7..e370a33b7c4a7 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3618,6 +3618,9 @@ class FIRToLLVMLowering if (!forcedTargetCPU.empty()) fir::setTargetCPU(mod, forcedTargetCPU); + if (!forcedTuneCPU.empty()) + fir::setTuneCPU(mod, forcedTuneCPU); + if (!forcedTargetFeatures.empty()) fir::setTargetFeatures(mod, forcedTargetFeatures); @@ -3714,7 +3717,8 @@ class FIRToLLVMLowering signalPassFailure(); } - // Run pass to add comdats to functions that have weak linkage on relevant platforms + // Run pass to add comdats to functions that have weak linkage on relevant + // platforms if (fir::getTargetTriple(mod).supportsCOMDAT()) { mlir::OpPassManager comdatPM("builtin.module"); comdatPM.addPass(mlir::LLVM::createLLVMAddComdats()); diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp index 652e2bddc1b89..25141102a8c43 100644 --- a/flang/lib/Optimizer/CodeGen/Target.cpp +++ b/flang/lib/Optimizer/CodeGen/Target.cpp @@ -1113,3 +1113,14 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp, } TODO(mlir::UnknownLoc::get(ctx), "target not implemented"); } + +std::unique_ptr<fir::CodeGenSpecifics> fir::CodeGenSpecifics::get( + mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap, + llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures, + const mlir::DataLayout &dl, llvm::StringRef tuneCPU) { + std::unique_ptr<fir::CodeGenSpecifics> CGS = fir::CodeGenSpecifics::get( + ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl); + + CGS->tuneCPU = tuneCPU; + return CGS; +} diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index 561d700f41220..b52f2b9325ece 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -89,6 +89,9 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> { if (!forcedTargetCPU.empty()) fir::setTargetCPU(mod, forcedTargetCPU); + if (!forcedTuneCPU.empty()) + fir::setTuneCPU(mod, forcedTuneCPU); + if (!forcedTargetFeatures.empty()) fir::setTargetFeatures(mod, forcedTargetFeatures); @@ -106,7 +109,8 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> { auto specifics = fir::CodeGenSpecifics::get( mod.getContext(), fir::getTargetTriple(mod), fir::getKindMapping(mod), - fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl); + fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl, + fir::getTuneCPU(mod)); setMembers(specifics.get(), &rewriter, &*dl); @@ -672,12 +676,18 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> { auto targetCPU = specifics->getTargetCPU(); mlir::StringAttr targetCPUAttr = targetCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, targetCPU); + auto tuneCPU = specifics->getTuneCPU(); + mlir::StringAttr tuneCPUAttr = + tuneCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, tuneCPU); auto targetFeaturesAttr = specifics->getTargetFeatures(); for (auto fn : mod.getOps<mlir::func::FuncOp>()) { if (targetCPUAttr) fn->setAttr("target_cpu", targetCPUAttr); + if (tuneCPUAttr) + fn->setAttr("tune_cpu", tuneCPUAttr); + if (targetFeaturesAttr) fn->setAttr("target_features", targetFeaturesAttr); diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp index ce86c625e082f..a28b03442fe83 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp @@ -35,7 +35,8 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA, kindMapping(getKindMapping(module)), specifics(CodeGenSpecifics::get( module.getContext(), getTargetTriple(module), getKindMapping(module), - getTargetCPU(module), getTargetFeatures(module), dl)), + getTargetCPU(module), getTargetFeatures(module), dl, + getTuneCPU(module))), tbaaBuilder(std::make_unique<TBAABuilder>(module->getContext(), applyTBAA, forceUnifiedTBAATree)), dataLayout{&dl} { diff --git a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp index c4d00875c45e4..1aa631cb39126 100644 --- a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp +++ b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp @@ -77,6 +77,24 @@ llvm::StringRef fir::getTargetCPU(mlir::ModuleOp mod) { return {}; } +static constexpr const char *tuneCpuName = "fir.tune_cpu"; + +void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) { + if (cpu.empty()) + return; + + auto *ctx = mod.getContext(); + + mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu)); +} + +llvm::StringRef fir::getTuneCPU(mlir::ModuleOp mod) { + if (auto attr = mod->getAttrOfType<mlir::StringAttr>(tuneCpuName)) + return attr.getValue(); + + return {}; +} + static constexpr const char *targetFeaturesName = "fir.target_features"; void fir::setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features) { diff --git a/flang/test/Driver/tune-cpu-fir.f90 b/flang/test/Driver/tune-cpu-fir.f90 new file mode 100644 index 0000000000000..43c13b426d5d9 --- /dev/null +++ b/flang/test/Driver/tune-cpu-fir.f90 @@ -0,0 +1,25 @@ +! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -target-cpu aarch64 %s -o - | FileCheck %s --check-prefixes=ALL,ARMCPU %} +! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -tune-cpu neoverse-n1 %s -o - | FileCheck %s --check-prefixes=ALL,ARMTUNE %} +! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -target-cpu aarch64 -tune-cpu neoverse-n1 %s -o - | FileCheck %s --check-prefixes=ALL,ARMBOTH %} + +! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o - | FileCheck %s --check-prefixes=ALL,X86CPU %} +! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,X86TUNE %} +! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,X86BOTH %} + +! ALL: module attributes { + +! ARMCPU-SAME: fir.target_cpu = "aarch64" +! ARMCPU-NOT: fir.tune_cpu = "neoverse-n1" + +! ARMTUNE-SAME: fir.tune_cpu = "neoverse-n1" + +! ARMBOTH-SAME: fir.target_cpu = "aarch64" +! ARMBOTH-SAME: fir.tune_cpu = "neoverse-n1" + +! X86CPU-SAME: fir.target_cpu = "x86-64" +! X86CPU-NOT: fir.tune_cpu = "pentium4" + +! X86TUNE-SAME: fir.tune_cpu = "pentium4" + +! X86BOTH-SAME: fir.target_cpu = "x86-64" +! X86BOTH-SAME: fir.tune_cpu = "pentium4" diff --git a/flang/test/Lower/tune-cpu-llvm.f90 b/flang/test/Lower/tune-cpu-llvm.f90 new file mode 100644 index 0000000000000..dc2a68730cf23 --- /dev/null +++ b/flang/test/Lower/tune-cpu-llvm.f90 @@ -0,0 +1,8 @@ +! RUN: %if x86-registered-target %{ %flang -mtune=pentium4 -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CHECK-X86 %} +! RUN: %if aarch64-registered-target %{ %flang -mtune=neoverse-n1 -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CHECK-ARM %} + +!ALL: attributes #{{[0-9]+}} = { +!CHECK-X86-SAME: "tune-cpu"="pentium4" +!CHECK-ARM-SAME: "tune-cpu"="neoverse-n1" +subroutine a +end subroutine a diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index e5e41ad3e9cf2..07eef065daf6f 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -367,11 +367,12 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR); loweringOptions.setNSWOnLoopVarInc(setNSW); std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {}; + constexpr const char *tuneCPU = ""; auto burnside = Fortran::lower::LoweringBridge::create( ctx, semanticsContext, defKinds, semanticsContext.intrinsics(), semanticsContext.targetCharacteristics(), parsing.allCooked(), targetTriple, kindMap, loweringOptions, envDefaults, - semanticsContext.languageFeatures(), targetMachine); + semanticsContext.languageFeatures(), targetMachine, tuneCPU); mlir::ModuleOp mlirModule = burnside.getModule(); if (enableOpenMP) { if (enableOpenMPGPU && !enableOpenMPDevice) { diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index 34ac0e1a5cb98..afaad39ce1268 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -58,6 +58,9 @@ static cl::opt<std::string> targetTriple("target", static cl::opt<std::string> targetCPU("target-cpu", cl::desc("specify a target CPU"), cl::init("")); +static cl::opt<std::string> + tuneCPU("tune-cpu", cl::desc("specify a tune CPU"), cl::init("")); + static cl::opt<std::string> targetFeatures("target-features", cl::desc("specify the target features"), cl::init("")); @@ -113,6 +116,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { fir::setTargetTriple(*owningRef, targetTriple); fir::setKindMapping(*owningRef, kindMap); fir::setTargetCPU(*owningRef, targetCPU); + fir::setTuneCPU(*owningRef, tuneCPU); fir::setTargetFeatures(*owningRef, targetFeatures); // tco is a testing tool, so it will happily use the target independent // data layout if none is on the module. diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp index 49e1ebf23d8aa..dbc00a3b1339d 100644 --- a/flang/unittests/Optimizer/FIRContextTest.cpp +++ b/flang/unittests/Optimizer/FIRContextTest.cpp @@ -34,6 +34,7 @@ struct StringAttributesTests : public testing::Test { "i10:80,l3:24,a1:8,r54:Double,r62:X86_FP80,r11:PPC_FP128"; std::string target = "powerpc64le-unknown-linux-gnu"; std::string targetCPU = "gfx90a"; + std::string tuneCPU = "generic"; std::string targetFeatures = "+gfx9-insts,+wavefrontsize64"; mlir::ModuleOp mod; }; @@ -42,6 +43,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) { setTargetTriple(mod, target); setKindMapping(mod, *kindMap); setTargetCPU(mod, targetCPU); + setTuneCPU(mod, tuneCPU); setTargetFeatures(mod, targetFeatures); auto triple = getTargetTriple(mod); @@ -61,7 +63,8 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) { EXPECT_TRUE(mapStr.find("r62:X86_FP80") != std::string::npos); EXPECT_EQ(getTargetCPU(mod), targetCPU); - + EXPECT_EQ(getTuneCPU(mod), tuneCPU); + auto features = getTargetFeatures(mod); auto featuresList = features.getFeatures(); EXPECT_EQ(features.getFeaturesString(), targetFeatures); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 65dfcf93d7029..f0dec69a5032a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1449,6 +1449,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [ OptionalAttr<LLVM_VScaleRangeAttr>:$vscale_range, OptionalAttr<FramePointerKindAttr>:$frame_pointer, OptionalAttr<StrAttr>:$target_cpu, + OptionalAttr<StrAttr>:$tune_cpu, OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features, OptionalAttr<BoolAttr>:$unsafe_fp_math, OptionalAttr<BoolAttr>:$no_infs_fp_math, diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 9915576bbc458..5bc3dd680d02d 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1688,6 +1688,7 @@ static constexpr std::array kExplicitAttributes{ StringLiteral("noinline"), StringLiteral("optnone"), StringLiteral("target-features"), + StringLiteral("tune-cpu"), StringLiteral("unsafe-fp-math"), StringLiteral("vscale_range"), }; @@ -1804,6 +1805,10 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func, attr.isStringAttribute()) funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString())); + if (llvm::Attribute attr = func->getFnAttribute("tune-cpu"); + attr.isStringAttribute()) + funcOp.setTuneCpuAttr(StringAttr::get(context, attr.getValueAsString())); + if (llvm::Attribute attr = func->getFnAttribute("target-features"); attr.isStringAttribute()) funcOp.setTargetFeaturesAttr( diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 1d2e4725d5d63..2735b13a1499f 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1325,6 +1325,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { if (auto targetCpu = func.getTargetCpu()) llvmFunc->addFnAttr("target-cpu", *targetCpu); + if (auto tuneCpu = func.getTuneCpu()) + llvmFunc->addFnAttr("tune-cpu", *tuneCpu); + if (auto targetFeatures = func.getTargetFeatures()) llvmFunc->addFnAttr("target-features", targetFeatures->getFeaturesString()); diff --git a/mlir/test/Target/LLVMIR/Import/tune-cpu.ll b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll new file mode 100644 index 0000000000000..991a70ada473c --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll @@ -0,0 +1,16 @@ +; RUN: mlir-translate -import-llvm -split-input-file %s | FileCheck %s + +; CHECK-LABEL: llvm.func @tune_cpu_x86() +; CHECK-SAME: tune_cpu = "pentium4" +define void @tune_cpu_x86() #0 { + ret void +} + +; CHECK-LABEL: llvm.func @tune_cpu_arm() +; CHECK-SAME: tune_cpu = "neoverse-n1" +define void @tune_cpu_arm() #1 { + ret void +} + +attributes #0 = { "tune-cpu"="pentium4" } +attributes #1 = { "tune-cpu"="neoverse-n1" } diff --git a/mlir/test/Target/LLVMIR/tune-cpu.mlir b/mlir/test/Target/LLVMIR/tune-cpu.mlir new file mode 100644 index 0000000000000..c7969f5eb4db0 --- /dev/null +++ b/mlir/test/Target/LLVMIR/tune-cpu.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK: define void @tune_cpu_x86() #[[ATTRSX86:.*]] { +// CHECK: define void @tune_cpu_arm() #[[ATTRSARM:.*]] { +// CHECK: attributes #[[ATTRSX86]] = { "tune-cpu"="pentium4" } +// CHECK: attributes #[[ATTRSARM]] = { "tune-cpu"="neoverse-n1" } + +llvm.func @tune_cpu_x86() attributes {tune_cpu = "pentium4"} { + llvm.return +} + +llvm.func @tune_cpu_arm() attributes {tune_cpu = "neoverse-n1"} { + llvm.return +} >From 0a9bf0d5e7a8b8ed0252f0c78c9bd010016f59d5 Mon Sep 17 00:00:00 2001 From: Alexis Perry-Holby <ape...@lanl.gov> Date: Thu, 11 Jul 2024 13:15:13 -0600 Subject: [PATCH 2/2] clang-format --- flang/include/flang/Optimizer/CodeGen/Target.h | 2 +- flang/include/flang/Optimizer/Dialect/Support/FIRContext.h | 2 +- flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 2 +- flang/tools/tco/tco.cpp | 4 ++-- flang/unittests/Optimizer/FIRContextTest.cpp | 2 +- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/flang/include/flang/Optimizer/CodeGen/Target.h b/flang/include/flang/Optimizer/CodeGen/Target.h index 2b3b2152ac80c..a7161152a5c32 100644 --- a/flang/include/flang/Optimizer/CodeGen/Target.h +++ b/flang/include/flang/Optimizer/CodeGen/Target.h @@ -181,7 +181,7 @@ class CodeGenSpecifics { llvm::StringRef getTargetCPU() const { return targetCPU; } llvm::StringRef getTuneCPU() const { return tuneCPU; } - + mlir::LLVM::TargetFeaturesAttr getTargetFeatures() const { return targetFeatures; } diff --git a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h index b69f1415040ec..bd31aa0782493 100644 --- a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h +++ b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h @@ -64,7 +64,7 @@ void setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu); /// Get the tune CPU string from the Module or return a null reference. llvm::StringRef getTuneCPU(mlir::ModuleOp mod); - + /// Set the target features for the module. void setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features); diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index b52f2b9325ece..85bf90e475063 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -110,7 +110,7 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> { auto specifics = fir::CodeGenSpecifics::get( mod.getContext(), fir::getTargetTriple(mod), fir::getKindMapping(mod), fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl, - fir::getTuneCPU(mod)); + fir::getTuneCPU(mod)); setMembers(specifics.get(), &rewriter, &*dl); diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index afaad39ce1268..a8c64333109ae 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -58,8 +58,8 @@ static cl::opt<std::string> targetTriple("target", static cl::opt<std::string> targetCPU("target-cpu", cl::desc("specify a target CPU"), cl::init("")); -static cl::opt<std::string> - tuneCPU("tune-cpu", cl::desc("specify a tune CPU"), cl::init("")); +static cl::opt<std::string> tuneCPU("tune-cpu", cl::desc("specify a tune CPU"), + cl::init("")); static cl::opt<std::string> targetFeatures("target-features", cl::desc("specify the target features"), diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp index dbc00a3b1339d..3f8b59ac94a95 100644 --- a/flang/unittests/Optimizer/FIRContextTest.cpp +++ b/flang/unittests/Optimizer/FIRContextTest.cpp @@ -64,7 +64,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) { EXPECT_EQ(getTargetCPU(mod), targetCPU); EXPECT_EQ(getTuneCPU(mod), tuneCPU); - + auto features = getTargetFeatures(mod); auto featuresList = features.getFeatures(); EXPECT_EQ(features.getFeaturesString(), targetFeatures); diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 5bc3dd680d02d..c4b5fea8ffe8b 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -133,8 +133,8 @@ static LogicalResult convertInstructionImpl(OpBuilder &odsBuilder, if (iface.isConvertibleInstruction(inst->getOpcode())) return iface.convertInstruction(odsBuilder, inst, llvmOperands, moduleImport); - // TODO: Implement the `convertInstruction` hooks in the - // `LLVMDialectLLVMIRImportInterface` and move the following include there. + // TODO: Implement the `convertInstruction` hooks in the + // `LLVMDialectLLVMIRImportInterface` and move the following include there. #include "mlir/Dialect/LLVMIR/LLVMOpFromLLVMIRConversions.inc" return failure(); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits