https://github.com/sebpop updated 
https://github.com/llvm/llvm-project/pull/142686

>From ff88172ea86363bd25f73fc78af1b8fca2cc4f3d Mon Sep 17 00:00:00 2001
From: Sebastian Pop <s...@nvidia.com>
Date: Thu, 22 May 2025 13:50:38 +0000
Subject: [PATCH] add -floop-fuse to clang and flang

---
 clang/include/clang/Basic/CodeGenOptions.def    |  1 +
 clang/include/clang/Driver/Options.td           |  4 ++++
 clang/lib/CodeGen/BackendUtil.cpp               |  2 ++
 clang/lib/Driver/ToolChains/Clang.cpp           |  1 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp      |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp           |  2 ++
 clang/lib/Frontend/CompilerInvocation.cpp       |  6 ++++++
 clang/test/Driver/clang_f_opts.c                |  7 +++++++
 flang/docs/ReleaseNotes.md                      |  1 +
 flang/include/flang/Frontend/CodeGenOptions.def |  1 +
 flang/lib/Frontend/CompilerInvocation.cpp       |  3 +++
 flang/lib/Frontend/FrontendActions.cpp          |  1 +
 flang/test/Driver/loop-fuse.f90                 | 17 +++++++++++++++++
 llvm/include/llvm/Passes/PassBuilder.h          |  3 +++
 llvm/lib/Passes/PassBuilderPipelines.cpp        | 13 ++++++++++++-
 15 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Driver/loop-fuse.f90

diff --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index aad4e107cbeb3..de40564bc280f 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -326,6 +326,7 @@ CODEGENOPT(TimeTrace         , 1, 0) ///< Set when 
-ftime-trace is enabled.
 VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity 
(in microseconds),
                                                ///< traced by time profiler
 CODEGENOPT(InterchangeLoops  , 1, 0) ///< Run loop-interchange.
+CODEGENOPT(FuseLoops         , 1, 0) ///< Run loop-fuse.
 CODEGENOPT(UnrollLoops       , 1, 0) ///< Control whether loops are unrolled.
 CODEGENOPT(RerollLoops       , 1, 0) ///< Control whether loops are rerolled.
 CODEGENOPT(NoUseJumpTables   , 1, 0) ///< Set when -fno-jump-tables is enabled.
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 5ca31c253ed8f..e5e2c0035e03f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4189,6 +4189,10 @@ def floop_interchange : Flag<["-"], 
"floop-interchange">, Group<f_Group>,
   HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, 
CC1Option, FlangOption, FC1Option]>;
 def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
   HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, 
CC1Option, FlangOption, FC1Option]>;
+def floop_fuse : Flag<["-"], "floop-fuse">, Group<f_Group>,
+  HelpText<"Enable the loop fuse pass">, Visibility<[ClangOption, CC1Option, 
FlangOption, FC1Option]>;
+def fno_loop_fuse: Flag<["-"], "fno-loop-fuse">, Group<f_Group>,
+  HelpText<"Disable the loop fuse pass">, Visibility<[ClangOption, CC1Option, 
FlangOption, FC1Option]>;
 def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
   HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, 
FlangOption, FC1Option]>;
 def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index cd5fc48c4a22b..539e413e8c86e 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -898,6 +898,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   PipelineTuningOptions PTO;
   PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
   PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
+  PTO.LoopFuse = CodeGenOpts.FuseLoops;
   // For historical reasons, loop interleaving is set to mirror setting for 
loop
   // unrolling.
   PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
@@ -1339,6 +1340,7 @@ runThinLTOBackend(CompilerInstance &CI, 
ModuleSummaryIndex *CombinedIndex,
   Conf.SampleProfile = std::move(SampleProfile);
   Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
   Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
+  Conf.PTO.LoopFuse = CGOpts.FuseLoops;
   // For historical reasons, loop interleaving is set to mirror setting for 
loop
   // unrolling.
   Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 13842b8cc2870..f3713e12cb5ef 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7030,6 +7030,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
                   options::OPT_fno_unroll_loops);
   Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
                   options::OPT_fno_loop_interchange);
+  Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, 
options::OPT_fno_loop_fuse);
 
   Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
 
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 937ee09cac7cc..4217b48a56b05 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -3157,7 +3157,7 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,
 
 void tools::handleInterchangeLoopsArgs(const ArgList &Args,
                                        ArgStringList &CmdArgs) {
-  // FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want 
to
+  // FIXME: Instead of relying on shouldEnableVectorizerAtOLevel, we may want 
to
   // implement a separate function to infer loop interchange from opt level.
   // For now, enable loop-interchange at the same opt levels as loop-vectorize.
   bool EnableInterchange = shouldEnableVectorizerAtOLevel(Args, false);
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index dcc46469df3e9..f34a58cab3ff3 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -151,6 +151,8 @@ void Flang::addCodegenOptions(const ArgList &Args,
       !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
     CmdArgs.push_back("-fstack-arrays");
 
+  Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, 
options::OPT_fno_loop_fuse);
+
   handleInterchangeLoopsArgs(Args, CmdArgs);
   handleVectorizeLoopsArgs(Args, CmdArgs);
   handleVectorizeSLPArgs(Args, CmdArgs);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index 2c02719121c73..582795956485e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1648,6 +1648,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const 
CodeGenOptions &Opts,
   else
     GenerateArg(Consumer, OPT_fno_loop_interchange);
 
+  if (Opts.FuseLoops)
+    GenerateArg(Consumer, OPT_floop_fuse);
+  else
+    GenerateArg(Consumer, OPT_fno_loop_fuse);
+
   if (!Opts.BinutilsVersion.empty())
     GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);
 
@@ -1963,6 +1968,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions 
&Opts, ArgList &Args,
                    (Opts.OptimizationLevel > 1));
   Opts.InterchangeLoops =
       Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
+  Opts.FuseLoops = Args.hasFlag(OPT_floop_fuse, OPT_fno_loop_fuse, false);
   Opts.BinutilsVersion =
       std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
 
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index ee7ded265769b..204d5e338190f 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -52,6 +52,13 @@
 // CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
 // CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"
 
+// RUN: %clang -### -S -floop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-FUSE-LOOPS %s
+// RUN: %clang -### -S -fno-loop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-NO-FUSE-LOOPS %s
+// RUN: %clang -### -S -fno-loop-fuse -floop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-FUSE-LOOPS %s
+// RUN: %clang -### -S -floop-fuse -fno-loop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-NO-FUSE-LOOPS %s
+// CHECK-FUSE-LOOPS: "-floop-fuse"
+// CHECK-NO-FUSE-LOOPS: "-fno-loop-fuse"
+
 // RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck 
-check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
 // CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
 
diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index 36be369595ffd..33795b7d1a55a 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -34,6 +34,7 @@ page](https://llvm.org/releases/).
 
 * -floop-interchange is now recognized by flang.
 * -floop-interchange is enabled by default at -O2 and above.
+* -floop-fuse is now recognized by flang.
 
 ## Windows Support
 
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def 
b/flang/include/flang/Frontend/CodeGenOptions.def
index a697872836569..85bec620b54ba 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -36,6 +36,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the 
stack-arrays pass)
 CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
 CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
 CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
+CODEGENOPT(FuseLoops, 1, 0)        ///< Enable loop fuse.
 CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
 CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 90a002929eff0..c2b63819e9f77 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -273,6 +273,9 @@ static void 
parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
   if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
     opts.InterchangeLoops = 1;
 
+  if (args.getLastArg(clang::driver::options::OPT_floop_fuse))
+    opts.FuseLoops = 1;
+
   if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
     opts.VectorizeLoop = 1;
 
diff --git a/flang/lib/Frontend/FrontendActions.cpp 
b/flang/lib/Frontend/FrontendActions.cpp
index 012d0fdfe645f..dc56cbe5c85bc 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -925,6 +925,7 @@ void 
CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
     si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   pto.LoopUnrolling = opts.UnrollLoops;
   pto.LoopInterchange = opts.InterchangeLoops;
+  pto.LoopFuse = opts.FuseLoops;
   pto.LoopInterleaving = opts.UnrollLoops;
   pto.LoopVectorization = opts.VectorizeLoop;
   pto.SLPVectorization = opts.VectorizeSLP;
diff --git a/flang/test/Driver/loop-fuse.f90 b/flang/test/Driver/loop-fuse.f90
new file mode 100644
index 0000000000000..240d00fdb62d7
--- /dev/null
+++ b/flang/test/Driver/loop-fuse.f90
@@ -0,0 +1,17 @@
+! RUN: %flang -### -S -floop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-LOOP-FUSE %s
+! RUN: %flang -### -S -fno-loop-fuse %s 2>&1 | FileCheck 
-check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE 
%s
+! CHECK-LOOP-FUSE: "-floop-fuse"
+! CHECK-NO-LOOP-FUSE-NOT: "-floop-fuse"
+! RUN: %flang_fc1 -emit-llvm -O2 -floop-fuse -mllvm -print-pipeline-passes -o 
/dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s
+! RUN: %flang_fc1 -emit-llvm -O2 -fno-loop-fuse -mllvm -print-pipeline-passes 
-o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
+! CHECK-LOOP-FUSE-PASS: loop-fusion
+! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion
+
+program test
+end program
diff --git a/llvm/include/llvm/Passes/PassBuilder.h 
b/llvm/include/llvm/Passes/PassBuilder.h
index 51ccaa53447d7..287a57c7d4a7d 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -64,6 +64,9 @@ class PipelineTuningOptions {
   /// false.
   bool LoopInterchange;
 
+  /// Tuning option to enable/disable loop fuse. Its default value is false.
+  bool LoopFuse;
+
   /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
   /// is that of the flag: `-forget-scev-loop-unroll`.
   bool ForgetAllSCEVInLoopUnroll;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f3654600c5abb..aaf43a9b535e9 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -104,6 +104,7 @@
 #include "llvm/Transforms/Scalar/LoopDeletion.h"
 #include "llvm/Transforms/Scalar/LoopDistribute.h"
 #include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
@@ -205,6 +206,10 @@ static cl::opt<bool>
     EnableLoopInterchange("enable-loopinterchange", cl::init(false), 
cl::Hidden,
                           cl::desc("Enable the LoopInterchange Pass"));
 
+static cl::opt<bool> EnableLoopFuse("enable-loopfuse", cl::init(false),
+                                    cl::Hidden,
+                                    cl::desc("Enable the LoopFuse Pass"));
+
 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
                                         cl::init(false), cl::Hidden,
                                         cl::desc("Enable Unroll And Jam 
Pass"));
@@ -314,6 +319,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   SLPVectorization = false;
   LoopUnrolling = true;
   LoopInterchange = EnableLoopInterchange;
+  LoopFuse = EnableLoopFuse;
   ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
   LicmMssaOptCap = SetLicmMssaOptCap;
   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
@@ -518,6 +524,9 @@ 
PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
 
   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
 
+  if (PTO.LoopFuse)
+    FPM.addPass(LoopFusePass());
+
   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
                                               /*UseMemorySSA=*/true,
                                               /*UseBlockFrequencyInfo=*/true));
@@ -709,6 +718,9 @@ 
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 
   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
 
+  if (PTO.LoopFuse)
+    FPM.addPass(LoopFusePass());
+
   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
                                               /*UseMemorySSA=*/true,
                                               /*UseBlockFrequencyInfo=*/true));
@@ -2112,7 +2124,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel 
Level,
     LPM.addPass(LoopFlattenPass());
   LPM.addPass(IndVarSimplifyPass());
   LPM.addPass(LoopDeletionPass());
-  // FIXME: Add loop interchange.
 
   // Unroll small loops and perform peeling.
   LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to