[flang-commits] [clang] [flang] [flang] Add -f[no-]unroll-loops flag (PR #122906)
David Truby via flang-commits
flang-commits at lists.llvm.org
Tue Jan 14 08:09:40 PST 2025
https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/122906
>From c9b2e5855fdbbaafb5512e1e2539983201202b25 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Wed, 8 Jan 2025 11:19:38 +0000
Subject: [PATCH 1/3] [flang] Add -f[no-]unroll-loops flag
This patch adds support for the -funroll-loops and -fno-unroll-loops
flags with similar behaviour to clang. funroll-loops is enabled at -O2
onwards as in clang.
---
clang/include/clang/Driver/Options.td | 4 +-
clang/lib/Driver/ToolChains/Flang.cpp | 7 ++-
.../include/flang/Frontend/CodeGenOptions.def | 1 +
flang/lib/Frontend/CompilerInvocation.cpp | 4 ++
flang/lib/Frontend/FrontendActions.cpp | 2 +
flang/test/HLFIR/unroll-loops.fir | 43 +++++++++++++++++++
6 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 flang/test/HLFIR/unroll-loops.fir
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2721c1b5d8dc55..4bab2ae4d8dd5c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4157,9 +4157,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>,
HelpText<"Issue call to specified function rather than a trap instruction">,
MarshallingInfoString<CodeGenOpts<"TrapFuncName">>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
- HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option]>;
+ HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
- HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option]>;
+ HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>,
HelpText<"Assume all non-trivial loops are finite.">, Visibility<[ClangOption, CC1Option]>;
def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index a7d0cc99f27d2d..282a4e267b3dfc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -150,12 +150,17 @@ void Flang::addCodegenOptions(const ArgList &Args,
if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");
+ Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
+ options::OPT_flang_deprecated_no_hlfir,
+ options::OPT_fno_ppc_native_vec_elem_order,
+ options::OPT_fppc_native_vec_elem_order});
Args.addAllArgs(CmdArgs,
{options::OPT_flang_experimental_hlfir,
options::OPT_flang_deprecated_no_hlfir,
options::OPT_fno_ppc_native_vec_elem_order,
options::OPT_fppc_native_vec_elem_order,
- options::OPT_ftime_report, options::OPT_ftime_report_EQ});
+ options::OPT_ftime_report, options::OPT_ftime_report_EQ,
+ options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
}
void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index 9d03ec88a56b8a..deb8d1aede518b 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -32,6 +32,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
+CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
CODEGENOPT(Underscoring, 1, 1)
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 5e7127313c1335..15b1e1e0a24881 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -246,6 +246,10 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_loop_versioning, false))
opts.LoopVersioning = 1;
+ opts.UnrollLoops = args.hasFlag(clang::driver::options::OPT_funroll_loops,
+ clang::driver::options::OPT_fno_unroll_loops,
+ (opts.OptimizationLevel > 1));
+
opts.AliasAnalysis = opts.OptimizationLevel > 0;
// -mframe-pointer=none/non-leaf/all option.
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 52a18d59c7cda5..b0545a7ac2f99a 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -1028,6 +1028,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.registerCallbacks(pic, &mam);
if (ci.isTimingEnabled())
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
+ pto.LoopUnrolling = opts.UnrollLoops;
+ pto.LoopInterleaving = opts.UnrollLoops;
llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
// Attempt to load pass plugins and register their callbacks with PB.
diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir
new file mode 100644
index 00000000000000..f645132262f8d6
--- /dev/null
+++ b/flang/test/HLFIR/unroll-loops.fir
@@ -0,0 +1,43 @@
+// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+
+// CHECK-LABEL: @unroll
+// CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
+func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) {
+ // CHECK: %[[GEPIV:.*]] = getelementptr i8, ptr %0, i64 -8
+ %scope = fir.dummy_scope : !fir.dscope
+ %c1000 = arith.constant 1000 : index
+ %shape = fir.shape %c1000 : (index) -> !fir.shape<1>
+ %a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xf64>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000xf64>>, !fir.ref<!fir.array<1000xf64>>)
+ %c1 = arith.constant 1 : index
+ fir.do_loop %arg1 = %c1 to %c1000 step %c1 {
+ // CHECK: [[BLK:.*]]:
+
+ // NO-UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 1, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
+ // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg i64 %[[PHI]] to double
+ // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[PHI]]
+ // NO-UNROLL-NEXT: store double %[[IV_D]], ptr %[[GEP]]
+ // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw nsw i64 %{{.*}}, 1
+ // NO-UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1001
+ // NO-UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
+
+ // UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
+ // UNROLL-NEXT: %[[IV0:.*]] = or disjoint i64 %[[PHI]], 1
+ // UNROLL-NEXT: %[[IV1:.*]] = add i64 %[[PHI]], 2
+ // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg i64 %[[IV0]] to double
+ // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg i64 %[[IV1]] to double
+ // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[PHI]]
+ // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[IV1]]
+ // UNROLL-NEXT: store double %[[IV0_D]], ptr %[[GEP0]]
+ // UNROLL-NEXT: store double %[[IV1_D]], ptr %[[GEP1]]
+ // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[PHI]], 2
+ // UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
+ // UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
+ %iv = fir.convert %arg1 : (index) -> f64
+ %ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000xf64>>, index) -> !fir.ref<f64>
+ hlfir.assign %iv to %ai : f64, !fir.ref<f64>
+ }
+ return
+}
>From d66a7c612beed3e4f2809d8beba5648cdeea7709 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Tue, 14 Jan 2025 15:26:42 +0000
Subject: [PATCH 2/3] Fix bad rebase and add compiler->frontend forwarding test
---
clang/lib/Driver/ToolChains/Flang.cpp | 4 ----
flang/test/Driver/funroll-loops.f90 | 5 +++++
2 files changed, 5 insertions(+), 4 deletions(-)
create mode 100644 flang/test/Driver/funroll-loops.f90
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 282a4e267b3dfc..86ed25badfa2b7 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -150,10 +150,6 @@ void Flang::addCodegenOptions(const ArgList &Args,
if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");
- Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
- options::OPT_flang_deprecated_no_hlfir,
- options::OPT_fno_ppc_native_vec_elem_order,
- options::OPT_fppc_native_vec_elem_order});
Args.addAllArgs(CmdArgs,
{options::OPT_flang_experimental_hlfir,
options::OPT_flang_deprecated_no_hlfir,
diff --git a/flang/test/Driver/funroll-loops.f90 b/flang/test/Driver/funroll-loops.f90
new file mode 100644
index 00000000000000..5c1a07e7d5d12e
--- /dev/null
+++ b/flang/test/Driver/funroll-loops.f90
@@ -0,0 +1,5 @@
+! RUN: %flang -### -funroll-loops %s 2>&1 | FileCheck %s -check-prefix UNROLL
+! RUN: %flang -### -fno-unroll-loops %s 2>&1 | FileCheck %s -check-prefix NO-UNROLL
+
+! UNROLL: "-funroll-loops"
+! NO-UNROLL: "-fno-unroll-loops"
>From ac03aad135cb8ad3ee2e40e2574284e0d17802b7 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Tue, 14 Jan 2025 16:08:45 +0000
Subject: [PATCH 3/3] Fix test on x86
---
flang/test/HLFIR/unroll-loops.fir | 50 +++++++++++++++----------------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/flang/test/HLFIR/unroll-loops.fir b/flang/test/HLFIR/unroll-loops.fir
index f645132262f8d6..e032cff548b8de 100644
--- a/flang/test/HLFIR/unroll-loops.fir
+++ b/flang/test/HLFIR/unroll-loops.fir
@@ -1,40 +1,40 @@
-// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
-// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
-// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
-// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=1 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
// CHECK-LABEL: @unroll
// CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
func.func @unroll(%arg0: !fir.ref<!fir.array<1000xf64>> {fir.bindc_name = "a"}) {
- // CHECK: %[[GEPIV:.*]] = getelementptr i8, ptr %0, i64 -8
%scope = fir.dummy_scope : !fir.dscope
%c1000 = arith.constant 1000 : index
%shape = fir.shape %c1000 : (index) -> !fir.shape<1>
%a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xf64>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000xf64>>, !fir.ref<!fir.array<1000xf64>>)
%c1 = arith.constant 1 : index
fir.do_loop %arg1 = %c1 to %c1000 step %c1 {
- // CHECK: [[BLK:.*]]:
+ // CHECK: br label %[[BLK:.*]]
+ // CHECK: [[BLK]]:
+ // CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
+ // CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ]
- // NO-UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 1, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
- // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg i64 %[[PHI]] to double
- // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[PHI]]
- // NO-UNROLL-NEXT: store double %[[IV_D]], ptr %[[GEP]]
- // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw nsw i64 %{{.*}}, 1
- // NO-UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1001
- // NO-UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
+ // NO-UNROLL-NEXT: %[[IV_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double>
+ // NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]]
+ // NO-UNROLL-NEXT: store <2 x double> %[[IV_D]], ptr %[[GEP]]
+ // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
+ // NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2)
- // UNROLL-NEXT: %[[PHI:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
- // UNROLL-NEXT: %[[IV0:.*]] = or disjoint i64 %[[PHI]], 1
- // UNROLL-NEXT: %[[IV1:.*]] = add i64 %[[PHI]], 2
- // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg i64 %[[IV0]] to double
- // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg i64 %[[IV1]] to double
- // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[PHI]]
- // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr double, ptr %[[GEPIV]], i64 %[[IV1]]
- // UNROLL-NEXT: store double %[[IV0_D]], ptr %[[GEP0]]
- // UNROLL-NEXT: store double %[[IV1_D]], ptr %[[GEP1]]
- // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[PHI]], 2
- // UNROLL-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
- // UNROLL-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
+ // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
+ // UNROLL-NEXT: %[[IV0_D:.*]] = uitofp nneg <2 x i64> %[[VIND]] to <2 x double>
+ // UNROLL-NEXT: %[[IV1_D:.*]] = uitofp nneg <2 x i64> %[[VIND1]] to <2 x double>
+ // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr double, ptr %[[ARG0]], i64 %[[IND]]
+ // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16
+ // UNROLL-NEXT: store <2 x double> %[[IV0_D]], ptr %[[GEP0]]
+ // UNROLL-NEXT: store <2 x double> %[[IV1_D]], ptr %[[GEP1]]
+ // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4
+ // UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4)
+
+ // CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
+ // CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
%iv = fir.convert %arg1 : (index) -> f64
%ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000xf64>>, index) -> !fir.ref<f64>
hlfir.assign %iv to %ai : f64, !fir.ref<f64>
More information about the flang-commits
mailing list