llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Sebastian Pop (sebpop) <details> <summary>Changes</summary> This adds the flag -floop-fuse to the clang and flang drivers. --- Full diff: https://github.com/llvm/llvm-project/pull/142686.diff 15 Files Affected: - (modified) clang/include/clang/Basic/CodeGenOptions.def (+1) - (modified) clang/include/clang/Driver/Options.td (+4) - (modified) clang/lib/CodeGen/BackendUtil.cpp (+2) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+1) - (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1) - (modified) clang/lib/Driver/ToolChains/Flang.cpp (+2) - (modified) clang/lib/Frontend/CompilerInvocation.cpp (+7) - (modified) clang/test/Driver/clang_f_opts.c (+7) - (modified) flang/docs/ReleaseNotes.md (+1) - (modified) flang/include/flang/Frontend/CodeGenOptions.def (+1) - (modified) flang/lib/Frontend/CompilerInvocation.cpp (+3) - (modified) flang/lib/Frontend/FrontendActions.cpp (+1) - (added) flang/test/Driver/loop-fuse.f90 (+17) - (modified) llvm/include/llvm/Passes/PassBuilder.h (+3) - (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+12-1) ``````````diff diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index aad4e107cbeb3..de40564bc280f 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -326,6 +326,7 @@ CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled. VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds), ///< traced by time profiler CODEGENOPT(InterchangeLoops , 1, 0) ///< Run loop-interchange. +CODEGENOPT(FuseLoops , 1, 0) ///< Run loop-fuse. CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5ca31c253ed8f..e5e2c0035e03f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4189,6 +4189,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>, HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>, HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; +def floop_fuse : Flag<["-"], "floop-fuse">, Group<f_Group>, + HelpText<"Enable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; +def fno_loop_fuse: Flag<["-"], "fno-loop-fuse">, Group<f_Group>, + HelpText<"Disable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cd5fc48c4a22b..539e413e8c86e 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -898,6 +898,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; PTO.LoopInterchange = CodeGenOpts.InterchangeLoops; + PTO.LoopFuse = CodeGenOpts.FuseLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; @@ -1339,6 +1340,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex, Conf.SampleProfile = std::move(SampleProfile); Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops; + Conf.PTO.LoopFuse = CGOpts.FuseLoops; // For historical reasons, loop interleaving is set to mirror setting for loop // unrolling. Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 13842b8cc2870..f3713e12cb5ef 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7030,6 +7030,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_unroll_loops); Args.AddLastArg(CmdArgs, options::OPT_floop_interchange, options::OPT_fno_loop_interchange); + Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse); Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 937ee09cac7cc..4217b48a56b05 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3157,7 +3157,7 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args, void tools::handleInterchangeLoopsArgs(const ArgList &Args, ArgStringList &CmdArgs) { - // FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to + // FIXME: Instead of relying on shouldEnableVectorizerAtOLevel, we may want to // implement a separate function to infer loop interchange from opt level. // For now, enable loop-interchange at the same opt levels as loop-vectorize. bool EnableInterchange = shouldEnableVectorizerAtOLevel(Args, false); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index dcc46469df3e9..f34a58cab3ff3 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -151,6 +151,8 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse); + handleInterchangeLoopsArgs(Args, CmdArgs); handleVectorizeLoopsArgs(Args, CmdArgs); handleVectorizeSLPArgs(Args, CmdArgs); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 2c02719121c73..5506e11f51491 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1648,6 +1648,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, else GenerateArg(Consumer, OPT_fno_loop_interchange); + if (Opts.FuseLoops) + GenerateArg(Consumer, OPT_floop_fuse); + else + GenerateArg(Consumer, OPT_fno_loop_fuse); + if (!Opts.BinutilsVersion.empty()) GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion); @@ -1963,6 +1968,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, (Opts.OptimizationLevel > 1)); Opts.InterchangeLoops = Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false); + Opts.FuseLoops = + Args.hasFlag(OPT_floop_fuse, OPT_fno_loop_fuse, false); Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index ee7ded265769b..204d5e338190f 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -52,6 +52,13 @@ // CHECK-INTERCHANGE-LOOPS: "-floop-interchange" // CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange" +// RUN: %clang -### -S -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s +// RUN: %clang -### -S -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s +// RUN: %clang -### -S -fno-loop-fuse -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s +// RUN: %clang -### -S -floop-fuse -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s +// CHECK-FUSE-LOOPS: "-floop-fuse" +// CHECK-NO-FUSE-LOOPS: "-fno-loop-fuse" + // RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s // CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate" diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index 36be369595ffd..33795b7d1a55a 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -34,6 +34,7 @@ page](https://llvm.org/releases/). * -floop-interchange is now recognized by flang. * -floop-interchange is enabled by default at -O2 and above. +* -floop-fuse is now recognized by flang. ## Windows Support diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index a697872836569..85bec620b54ba 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -36,6 +36,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization. CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange. +CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fuse. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 90a002929eff0..c2b63819e9f77 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -273,6 +273,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, if (args.getLastArg(clang::driver::options::OPT_floop_interchange)) opts.InterchangeLoops = 1; + if (args.getLastArg(clang::driver::options::OPT_floop_fuse)) + opts.FuseLoops = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) opts.VectorizeLoop = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 012d0fdfe645f..dc56cbe5c85bc 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -925,6 +925,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterchange = opts.InterchangeLoops; + pto.LoopFuse = opts.FuseLoops; pto.LoopInterleaving = opts.UnrollLoops; pto.LoopVectorization = opts.VectorizeLoop; pto.SLPVectorization = opts.VectorizeSLP; diff --git a/flang/test/Driver/loop-fuse.f90 b/flang/test/Driver/loop-fuse.f90 new file mode 100644 index 0000000000000..240d00fdb62d7 --- /dev/null +++ b/flang/test/Driver/loop-fuse.f90 @@ -0,0 +1,17 @@ +! RUN: %flang -### -S -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s +! RUN: %flang -### -S -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s +! CHECK-LOOP-FUSE: "-floop-fuse" +! CHECK-NO-LOOP-FUSE-NOT: "-floop-fuse" +! RUN: %flang_fc1 -emit-llvm -O2 -floop-fuse -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s +! RUN: %flang_fc1 -emit-llvm -O2 -fno-loop-fuse -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s +! CHECK-LOOP-FUSE-PASS: loop-fusion +! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion + +program test +end program diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 51ccaa53447d7..287a57c7d4a7d 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -64,6 +64,9 @@ class PipelineTuningOptions { /// false. bool LoopInterchange; + /// Tuning option to enable/disable loop fuse. Its default value is false. + bool LoopFuse; + /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value /// is that of the flag: `-forget-scev-loop-unroll`. bool ForgetAllSCEVInLoopUnroll; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index f3654600c5abb..aaf43a9b535e9 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -104,6 +104,7 @@ #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopFlatten.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopInterchange.h" @@ -205,6 +206,10 @@ static cl::opt<bool> EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass")); +static cl::opt<bool> EnableLoopFuse("enable-loopfuse", cl::init(false), + cl::Hidden, + cl::desc("Enable the LoopFuse Pass")); + static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass")); @@ -314,6 +319,7 @@ PipelineTuningOptions::PipelineTuningOptions() { SLPVectorization = false; LoopUnrolling = true; LoopInterchange = EnableLoopInterchange; + LoopFuse = EnableLoopFuse; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; @@ -518,6 +524,9 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, invokeLoopOptimizerEndEPCallbacks(LPM2, Level); + if (PTO.LoopFuse) + FPM.addPass(LoopFusePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); @@ -709,6 +718,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, invokeLoopOptimizerEndEPCallbacks(LPM2, Level); + if (PTO.LoopFuse) + FPM.addPass(LoopFusePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); @@ -2112,7 +2124,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, LPM.addPass(LoopFlattenPass()); LPM.addPass(IndVarSimplifyPass()); LPM.addPass(LoopDeletionPass()); - // FIXME: Add loop interchange. // Unroll small loops and perform peeling. LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), `````````` </details> https://github.com/llvm/llvm-project/pull/142686 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits