https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/122906
>From c9b2e5855fdbbaafb5512e1e2539983201202b25 Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Wed, 8 Jan 2025 11:19:38 +0000 Subject: [PATCH 1/5] [flang] Add -f[no-]unroll-loops flag This patch adds support for the -funroll-loops and -fno-unroll-loops flags with similar behaviour to clang. funroll-loops is enabled at -O2 onwards as in clang. --- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Driver/ToolChains/Flang.cpp | 7 ++- .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 ++ flang/lib/Frontend/FrontendActions.cpp | 2 + flang/test/HLFIR/unroll-loops.fir | 43 +++++++++++++++++++ 6 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 flang/test/HLFIR/unroll-loops.fir diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2721c1b5d8dc55..4bab2ae4d8dd5c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4157,9 +4157,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>, HelpText<"Issue call to specified function rather than a trap instruction">, MarshallingInfoString<CodeGenOpts<"TrapFuncName">>; def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>, - HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>, - HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>, HelpText<"Assume all non-trivial loops are finite.">, Visibility<[ClangOption, CC1Option]>; def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index a7d0cc99f27d2d..282a4e267b3dfc 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -150,12 +150,17 @@ void Flang::addCodegenOptions(const ArgList &Args, if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); + Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, + options::OPT_flang_deprecated_no_hlfir, + options::OPT_fno_ppc_native_vec_elem_order, + options::OPT_fppc_native_vec_elem_order}); Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, options::OPT_flang_deprecated_no_hlfir, options::OPT_fno_ppc_native_vec_elem_order, options::OPT_fppc_native_vec_elem_order, - options::OPT_ftime_report, options::OPT_ftime_report_EQ}); + options::OPT_ftime_report, options::OPT_ftime_report_EQ, + options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index 9d03ec88a56b8a..deb8d1aede518b 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -32,6 +32,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. +CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass CODEGENOPT(Underscoring, 1, 1) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 5e7127313c1335..15b1e1e0a24881 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -246,6 +246,10 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; + opts.UnrollLoops = args.hasFlag(clang::driver::options::OPT_funroll_loops, + clang::driver::options::OPT_fno_unroll_loops, + (opts.OptimizationLevel > 1)); + opts.AliasAnalysis = opts.OptimizationLevel > 0; // -mframe-pointer=none/non-leaf/all option. diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 52a18d59c7cda5..b0545a7ac2f99a 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -1028,6 +1028,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.registerCallbacks(pic, &mam); if (ci.isTimingEnabled()) si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); + pto.LoopUnrolling = opts.UnrollLoops; + pto.LoopInterleaving = opts.UnrollLoops; llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir new file mode 100644 index 00000000000000..f645132262f8d6 --- /dev/null +++ b/flang/test/HLFIR/unroll-loops.fir @@ -0,0 +1,43 @@ +// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL + +// CHECK-LABEL: @unroll +// CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]]) +func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) { + // CHECK: %[[GEPIV:.*]] = getelementptr i8, ptr %0, i64 -8 + %scope = fir.dummy_scope : !fir.dscope + %c1000 = arith.constant 1000 : index + %shape = fir.shape %c1000 : (index) -> !fir.shape<1> + %a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xf64>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000xf64>>, !fir.ref<!fir.array<1000xf64>>) + %c1 = arith.constant 1 : index + fir.do_loop %arg1 = %c1 to %c1000 step %c1 { + // CHECK: [[BLK:.*]]: + + // NO-UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 1, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] + // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg i64 %[[PHI]] to double + // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[PHI]] + // NO-UNROLL-NEXT: store double %[[IV_D]], ptr %[[GEP]] + // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw nsw i64 %{{.*}}, 1 + // NO-UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1001 + // NO-UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] + + // UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] + // UNROLL-NEXT: %[[IV0:.*]] = or disjoint i64 %[[PHI]], 1 + // UNROLL-NEXT: %[[IV1:.*]] = add i64 %[[PHI]], 2 + // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg i64 %[[IV0]] to double + // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg i64 %[[IV1]] to double + // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[PHI]] + // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[IV1]] + // UNROLL-NEXT: store double %[[IV0_D]], ptr %[[GEP0]] + // UNROLL-NEXT: store double %[[IV1_D]], ptr %[[GEP1]] + // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[PHI]], 2 + // UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 + // UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] + %iv = fir.convert %arg1 : (index) -> f64 + %ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000xf64>>, index) -> !fir.ref<f64> + hlfir.assign %iv to %ai : f64, !fir.ref<f64> + } + return +} >From d66a7c612beed3e4f2809d8beba5648cdeea7709 Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Tue, 14 Jan 2025 15:26:42 +0000 Subject: [PATCH 2/5] Fix bad rebase and add compiler->frontend forwarding test --- clang/lib/Driver/ToolChains/Flang.cpp | 4 ---- flang/test/Driver/funroll-loops.f90 | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 flang/test/Driver/funroll-loops.f90 diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 282a4e267b3dfc..86ed25badfa2b7 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -150,10 +150,6 @@ void Flang::addCodegenOptions(const ArgList &Args, if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); - Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, - options::OPT_flang_deprecated_no_hlfir, - options::OPT_fno_ppc_native_vec_elem_order, - options::OPT_fppc_native_vec_elem_order}); Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, options::OPT_flang_deprecated_no_hlfir, diff --git a/flang/test/Driver/funroll-loops.f90 b/flang/test/Driver/funroll-loops.f90 new file mode 100644 index 00000000000000..5c1a07e7d5d12e --- /dev/null +++ b/flang/test/Driver/funroll-loops.f90 @@ -0,0 +1,5 @@ +! RUN: %flang -### -funroll-loops %s 2>&1 | FileCheck %s -check-prefix UNROLL +! RUN: %flang -### -fno-unroll-loops %s 2>&1 | FileCheck %s -check-prefix NO-UNROLL + +! UNROLL: "-funroll-loops" +! NO-UNROLL: "-fno-unroll-loops" >From ac03aad135cb8ad3ee2e40e2574284e0d17802b7 Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Tue, 14 Jan 2025 16:08:45 +0000 Subject: [PATCH 3/5] Fix test on x86 --- flang/test/HLFIR/unroll-loops.fir | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir index f645132262f8d6..e032cff548b8de 100644 --- a/flang/test/HLFIR/unroll-loops.fir +++ b/flang/test/HLFIR/unroll-loops.fir @@ -1,40 +1,40 @@ -// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL -// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL // CHECK-LABEL: @unroll // CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]]) func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) { - // CHECK: %[[GEPIV:.*]] = getelementptr i8, ptr %0, i64 -8 %scope = fir.dummy_scope : !fir.dscope %c1000 = arith.constant 1000 : index %shape = fir.shape %c1000 : (index) -> !fir.shape<1> %a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xf64>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000xf64>>, !fir.ref<!fir.array<1000xf64>>) %c1 = arith.constant 1 : index fir.do_loop %arg1 = %c1 to %c1000 step %c1 { - // CHECK: [[BLK:.*]]: + // CHECK: br label %[[BLK:.*]] + // CHECK: [[BLK]]: + // CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] + // CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ] - // NO-UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 1, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] - // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg i64 %[[PHI]] to double - // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[PHI]] - // NO-UNROLL-NEXT: store double %[[IV_D]], ptr %[[GEP]] - // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw nsw i64 %{{.*}}, 1 - // NO-UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1001 - // NO-UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] + // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> + // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + // NO-UNROLL-NEXT: store <2 x double> %[[IV_D]], ptr %[[GEP]] + // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 + // NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) - // UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] - // UNROLL-NEXT: %[[IV0:.*]] = or disjoint i64 %[[PHI]], 1 - // UNROLL-NEXT: %[[IV1:.*]] = add i64 %[[PHI]], 2 - // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg i64 %[[IV0]] to double - // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg i64 %[[IV1]] to double - // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[PHI]] - // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[IV1]] - // UNROLL-NEXT: store double %[[IV0_D]], ptr %[[GEP0]] - // UNROLL-NEXT: store double %[[IV1_D]], ptr %[[GEP1]] - // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[PHI]], 2 - // UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 - // UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] + // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) + // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> + // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg <2 x i64> %[[VIND1]] to <2 x double> + // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 + // UNROLL-NEXT: store <2 x double> %[[IV0_D]], ptr %[[GEP0]] + // UNROLL-NEXT: store <2 x double> %[[IV1_D]], ptr %[[GEP1]] + // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 + // UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) + + // CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 + // CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] %iv = fir.convert %arg1 : (index) -> f64 %ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000xf64>>, index) -> !fir.ref<f64> hlfir.assign %iv to %ai : f64, !fir.ref<f64> >From f27462b9ad6f9f79cc7f08aa8962a537da0a18cf Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Tue, 14 Jan 2025 16:33:00 +0000 Subject: [PATCH 4/5] Add integration test --- flang/test/Integration/unroll-loops.f90 | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 flang/test/Integration/unroll-loops.f90 diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 new file mode 100644 index 00000000000000..b03bac97c6eb32 --- /dev/null +++ b/flang/test/Integration/unroll-loops.f90 @@ -0,0 +1,37 @@ +! RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +! RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL + +! CHECK-LABEL: @unroll +! CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]]) +subroutine unroll(a) + real(kind=8), intent(out) :: a(1000) + integer(kind=8) :: i + ! CHECK: br label %[[BLK:.*]] + ! CHECK: [[BLK]]: + ! CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] + ! CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ] + ! + ! NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> + ! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + ! NO-UNROLL-NEXT: store <2 x double> %[[IV_D]], ptr %[[GEP]] + ! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 + ! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) + ! + ! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) + ! UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> + ! UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg <2 x i64> %[[VIND1]] to <2 x double> + ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + ! UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 + ! UNROLL-NEXT: store <2 x double> %[[IV0_D]], ptr %[[GEP0]] + ! UNROLL-NEXT: store <2 x double> %[[IV1_D]], ptr %[[GEP1]] + ! UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 + ! UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) + ! + ! CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 + ! CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] + do i=1,1000 + a(i) = i + end do +end subroutine >From 0141e0d2413cdfebf92f48c3a8ca2759c4b4687e Mon Sep 17 00:00:00 2001 From: David Truby <david.tr...@arm.com> Date: Tue, 14 Jan 2025 17:28:59 +0000 Subject: [PATCH 5/5] Switch to i64 to simplify tests --- flang/test/HLFIR/unroll-loops.fir | 22 +++++++++------------- flang/test/Integration/unroll-loops.f90 | 15 ++++++--------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir index e032cff548b8de..83b30d4d72693c 100644 --- a/flang/test/HLFIR/unroll-loops.fir +++ b/flang/test/HLFIR/unroll-loops.fir @@ -5,11 +5,11 @@ // CHECK-LABEL: @unroll // CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]]) -func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) { +func.func @unroll(%arg0: !fir.ref<!fir.array<1000 x index>> {fir.bindc_name = "a"}) { %scope = fir.dummy_scope : !fir.dscope %c1000 = arith.constant 1000 : index %shape = fir.shape %c1000 : (index) -> !fir.shape<1> - %a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xf64>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000xf64>>, !fir.ref<!fir.array<1000xf64>>) + %a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xindex>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000 x index>>, !fir.ref<!fir.array<1000 x index>>) %c1 = arith.constant 1 : index fir.do_loop %arg1 = %c1 to %c1000 step %c1 { // CHECK: br label %[[BLK:.*]] @@ -17,27 +17,23 @@ func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) // CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] // CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ] - // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> - // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] - // NO-UNROLL-NEXT: store <2 x double> %[[IV_D]], ptr %[[GEP]] + // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] + // NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 // NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) - // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> - // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg <2 x i64> %[[VIND1]] to <2 x double> - // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 - // UNROLL-NEXT: store <2 x double> %[[IV0_D]], ptr %[[GEP0]] - // UNROLL-NEXT: store <2 x double> %[[IV1_D]], ptr %[[GEP1]] + // UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]] + // UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]] // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 // UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) // CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 // CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] - %iv = fir.convert %arg1 : (index) -> f64 - %ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000xf64>>, index) -> !fir.ref<f64> - hlfir.assign %iv to %ai : f64, !fir.ref<f64> + %ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000 x index>>, index) -> !fir.ref<index> + hlfir.assign %arg1 to %ai : index, !fir.ref<index> } return } diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index b03bac97c6eb32..939c96e150690e 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -6,26 +6,23 @@ ! CHECK-LABEL: @unroll ! CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]]) subroutine unroll(a) - real(kind=8), intent(out) :: a(1000) + integer(kind=8), intent(out) :: a(1000) integer(kind=8) :: i ! CHECK: br label %[[BLK:.*]] ! CHECK: [[BLK]]: ! CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ] ! CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ] ! - ! NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> - ! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] - ! NO-UNROLL-NEXT: store <2 x double> %[[IV_D]], ptr %[[GEP]] + ! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] + ! NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]] ! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 ! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2) ! ! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) - ! UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double> - ! UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg <2 x i64> %[[VIND1]] to <2 x double> - ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]] + ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]] ! UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 - ! UNROLL-NEXT: store <2 x double> %[[IV0_D]], ptr %[[GEP0]] - ! UNROLL-NEXT: store <2 x double> %[[IV1_D]], ptr %[[GEP1]] + ! UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]] + ! UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]] ! UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 ! UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) ! _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits