[clang] [llvm] [Clang][Driver] Add an option to control loop-interchange (PR #125830)
Sjoerd Meijer via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 6 03:01:22 PST 2025
https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/125830
>From da944d743f9fb97ddb1a40f58d43b0262f58205a Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 6 Feb 2025 03:00:24 -0800
Subject: [PATCH] [Clang][Driver] Add an option to control loop-interchange
This introduces options -floop-interchange and -fno-loop-interchange to
enable/disable the loop-interchange pass. This is part of the work that
tries to get that pass enabled by default (#124911), where it was
remarked that a user facing option to control this would be convenient
to have. The option (name) is the same as GCC's.
---
clang/include/clang/Basic/CodeGenOptions.def | 1 +
clang/include/clang/Driver/Options.td | 5 +++++
clang/lib/CodeGen/BackendUtil.cpp | 2 ++
clang/lib/Driver/ToolChains/Clang.cpp | 2 ++
clang/lib/Frontend/CompilerInvocation.cpp | 7 +++++++
clang/test/Driver/clang_f_opts.c | 7 +++++++
llvm/include/llvm/Passes/PassBuilder.h | 4 ++++
llvm/lib/Passes/PassBuilderPipelines.cpp | 9 +++------
8 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 1ab8c7fb4d3c33c..22e6bf4aae6db50 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -319,6 +319,7 @@ CODEGENOPT(TimePassesPerRun , 1, 0) ///< Set when -ftime-report=per-pass-run is
CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled.
VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds),
///< traced by time profiler
+CODEGENOPT(InterchangeLoops , 1, 0) ///< Run loop-interchange.
CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled.
CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index df705104d9ea314..a0c13246acfedae 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4161,6 +4161,10 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>,
Visibility<[ClangOption, CC1Option]>,
HelpText<"Issue call to specified function rather than a trap instruction">,
MarshallingInfoString<CodeGenOpts<"TrapFuncName">>;
+def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
+ HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
+def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
+ HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
@@ -7498,6 +7502,7 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt",
PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the "
"optimization pipeline">,
NegFlag<SetFalse, [], [ClangOption]>>;
+
def vectorize_loops : Flag<["-"], "vectorize-loops">,
HelpText<"Run the Loop vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 3e65eeb3755d2ff..0f20fba09e44409 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -881,6 +881,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
+ PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
@@ -1305,6 +1306,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex,
initTargetOptions(CI, Diags, Conf.Options);
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
+ Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 33f08cf28feca18..bb7ddb93e338c04 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7041,6 +7041,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
options::OPT_fno_unroll_loops);
+ Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
+ options::OPT_fno_loop_interchange);
Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 3bf124e4827be96..cb030473908fa6c 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1665,6 +1665,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1)
GenerateArg(Consumer, OPT_fno_unroll_loops);
+ if (Opts.InterchangeLoops)
+ GenerateArg(Consumer, OPT_floop_interchange);
+ else
+ GenerateArg(Consumer, OPT_fno_loop_interchange);
+
if (!Opts.BinutilsVersion.empty())
GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);
@@ -1968,6 +1973,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
Opts.UnrollLoops =
Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
(Opts.OptimizationLevel > 1));
+ Opts.InterchangeLoops =
+ Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
Opts.BinutilsVersion =
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 38f25898c955682..7454ce3d30f5fc3 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -45,6 +45,13 @@
// CHECK-UNROLL-LOOPS: "-funroll-loops"
// CHECK-NO-UNROLL-LOOPS: "-fno-unroll-loops"
+// RUN: %clang -### -S -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-INTERCHANGE-LOOPS %s
+// RUN: %clang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-INTERCHANGE-LOOPS %s
+// RUN: %clang -### -S -fno-loop-interchange -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-INTERCHANGE-LOOPS %s
+// RUN: %clang -### -S -floop-interchange -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-INTERCHANGE-LOOPS %s
+// CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
+// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"
+
// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index e7bc3a58f414f15..361b9b678f9be7f 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -60,6 +60,10 @@ class PipelineTuningOptions {
/// Tuning option to enable/disable loop unrolling. Its default value is true.
bool LoopUnrolling;
+ /// Tuning option to enable/disable loop interchange. Its default value is
+ /// false.
+ bool LoopInterchange;
+
/// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
/// is that of the flag: `-forget-scev-loop-unroll`.
bool ForgetAllSCEVInLoopUnroll;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 4ec0fb8fc81ea4c..74f8148ee9529ad 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -200,10 +200,6 @@ static cl::opt<bool> ExtraVectorizerPasses(
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
-static cl::opt<bool> EnableLoopInterchange(
- "enable-loopinterchange", cl::init(false), cl::Hidden,
- cl::desc("Enable the experimental LoopInterchange Pass"));
-
static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
cl::init(false), cl::Hidden,
cl::desc("Enable Unroll And Jam Pass"));
@@ -316,6 +312,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
LoopVectorization = true;
SLPVectorization = false;
LoopUnrolling = true;
+ LoopInterchange = false;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
@@ -480,7 +477,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopDeletionPass());
- if (EnableLoopInterchange)
+ if (PTO.LoopInterchange)
LPM2.addPass(LoopInterchangePass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
@@ -671,7 +668,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopDeletionPass());
- if (EnableLoopInterchange)
+ if (PTO.LoopInterchange)
LPM2.addPass(LoopInterchangePass());
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
More information about the cfe-commits
mailing list