[flang-commits] [flang] efae695 - Add -f[no-]loop-versioning option
Mats Petersson via flang-commits
flang-commits at lists.llvm.org
Tue Apr 18 01:48:38 PDT 2023
Author: Mats Petersson
Date: 2023-04-18T09:47:54+01:00
New Revision: efae695d52ccc987ddd3eb042eeb11f06cb2383b
URL: https://github.com/llvm/llvm-project/commit/efae695d52ccc987ddd3eb042eeb11f06cb2383b
DIFF: https://github.com/llvm/llvm-project/commit/efae695d52ccc987ddd3eb042eeb11f06cb2383b.diff
LOG: Add -f[no-]loop-versioning option
Add flags for loop-versioning pass enable/disable
Reviewed By: awarzynski, tblah
Differential Revision: https://reviews.llvm.org/D141307
Added:
flang/test/Driver/version-loops.f90
Modified:
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Flang.cpp
clang/lib/Driver/ToolChains/Flang.h
flang/include/flang/Frontend/CodeGenOptions.def
flang/include/flang/Tools/CLOptions.inc
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/driver-help-hidden.f90
flang/test/Driver/driver-help.f90
flang/test/Driver/frontend-forwarding.f90
Removed:
################################################################################
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7fa1d7d636bb..eed0d517a1ad 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5167,7 +5167,9 @@ def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
PosFlag<SetTrue, [], "Attempt to allocate array temporaries on the stack, no matter their size">,
NegFlag<SetFalse, [], "Allocate array temporaries on the heap (default)">>;
-
+defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
+ PosFlag<SetTrue, [], "Create unit-strided versions of loops">,
+ NegFlag<SetFalse, [], "Do not create unit-strided loops (default)">>;
} // let Flags = [FC1Option, FlangOption, FlangOnlyOption]
def J : JoinedOrSeparate<["-"], "J">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 57555ed32553..300072db35ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -52,6 +52,55 @@ void Flang::addPreprocessingOptions(const ArgList &Args,
options::OPT_I, options::OPT_cpp, options::OPT_nocpp});
}
+/// @C shouldLoopVersion
+///
+/// Check if Loop Versioning should be enabled.
+/// We look for the last of one of the following:
+/// -Ofast, -O4, -O<number> and -f[no-]version-loops-for-stride.
+/// Loop versioning is disabled if the last option is
+/// -fno-version-loops-for-stride.
+/// Loop versioning is enabled if the last option is one of:
+/// -floop-versioning
+/// -Ofast
+/// -O4
+/// -O3
+/// For all other cases, loop versioning is is disabled.
+///
+/// The gfortran compiler automatically enables the option for -O3 or -Ofast.
+///
+/// @return true if loop-versioning should be enabled, otherwise false.
+static bool shouldLoopVersion(const ArgList &Args) {
+ const Arg *LoopVersioningArg = Args.getLastArg(
+ options::OPT_Ofast, options::OPT_O, options::OPT_O4,
+ options::OPT_floop_versioning, options::OPT_fno_loop_versioning);
+ if (!LoopVersioningArg)
+ return false;
+
+ if (LoopVersioningArg->getOption().matches(options::OPT_fno_loop_versioning))
+ return false;
+
+ if (LoopVersioningArg->getOption().matches(options::OPT_floop_versioning))
+ return true;
+
+ if (LoopVersioningArg->getOption().matches(options::OPT_Ofast) ||
+ LoopVersioningArg->getOption().matches(options::OPT_O4))
+ return true;
+
+ if (LoopVersioningArg->getOption().matches(options::OPT_O)) {
+ StringRef S(LoopVersioningArg->getValue());
+ unsigned OptLevel = 0;
+ // Note -Os or Oz woould "fail" here, so return false. Which is the
+ // desiered behavior.
+ if (S.getAsInteger(10, OptLevel))
+ return false;
+
+ return OptLevel > 2;
+ }
+
+ llvm_unreachable("We should not end up here");
+ return false;
+}
+
void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Args.AddAllArgs(CmdArgs,
{options::OPT_module_dir, options::OPT_fdebug_module_writer,
@@ -60,16 +109,6 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ,
options::OPT_funderscoring, options::OPT_fno_underscoring});
- Arg *stackArrays =
- Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
- options::OPT_fno_stack_arrays);
- if (stackArrays &&
- !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
- CmdArgs.push_back("-fstack-arrays");
-
- if (Args.hasArg(options::OPT_flang_experimental_hlfir))
- CmdArgs.push_back("-flang-experimental-hlfir");
-
llvm::codegenoptions::DebugInfoKind DebugInfoKind;
if (Args.hasArg(options::OPT_gN_Group)) {
Arg *gNArg = Args.getLastArg(options::OPT_gN_Group);
@@ -82,6 +121,21 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
addDebugInfoKind(CmdArgs, DebugInfoKind);
}
+void Flang::addCodegenOptions(const ArgList &Args,
+ ArgStringList &CmdArgs) const {
+ Arg *stackArrays =
+ Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
+ options::OPT_fno_stack_arrays);
+ if (stackArrays &&
+ !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
+ CmdArgs.push_back("-fstack-arrays");
+
+ if (Args.hasArg(options::OPT_flang_experimental_hlfir))
+ CmdArgs.push_back("-flang-experimental-hlfir");
+ if (shouldLoopVersion(Args))
+ CmdArgs.push_back("-fversion-loops-for-stride");
+}
+
void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
// ParsePICArgs parses -fPIC/-fPIE and their variants and returns a tuple of
// (RelocationModel, PICLevel, IsPIE).
@@ -391,6 +445,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
// Add target args, features, etc.
addTargetOptions(Args, CmdArgs);
+ // Add Codegen options
+ addCodegenOptions(Args, CmdArgs);
+
// Add other compile options
addOtherOptions(Args, CmdArgs);
diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 0dc3cb7eeadc..962b4ae60172 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -67,6 +67,14 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
const JobAction &JA, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
+ /// Extract options for code generation from the driver arguments and add them
+ /// to the command arguments.
+ ///
+ /// \param [in] Args The list of input driver arguments
+ /// \param [out] CmdArgs The list of output command arguments
+ void addCodegenOptions(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const;
+
/// Extract other compilation options from the driver arguments and add them
/// to the command arguments.
///
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dbdfe073d2de..c3a04108aa08 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(Underscoring, 1, 1)
ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 2c5efeceda62..cb19d5ddfd9b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -186,7 +186,7 @@ inline void addExternalNameConversionPass(
/// \param pm - MLIR pass manager that will hold the pipeline definition
inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
llvm::OptimizationLevel optLevel = defaultOptLevel,
- bool stackArrays = false) {
+ bool stackArrays = false, bool loopVersioning = false) {
// simplify the IR
mlir::GreedyRewriteConfig config;
config.enableRegionSimplification = false;
@@ -283,11 +283,13 @@ inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
llvm::OptimizationLevel optLevel = defaultOptLevel,
bool stackArrays = false, bool underscoring = true,
+ bool loopVersioning = false,
llvm::codegenoptions::DebugInfoKind debugInfo = NoDebugInfo) {
fir::createHLFIRToFIRPassPipeline(pm, optLevel);
// Add default optimizer pass pipeline.
- fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays);
+ fir::createDefaultFIROptimizerPassPipeline(
+ pm, optLevel, stackArrays, loopVersioning);
// Add codegen pass pipeline.
fir::createDefaultFIRCodeGenPassPipeline(
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index e05ce4343f4c..c6d5152c38d1 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -163,6 +163,10 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_stack_arrays, false)) {
opts.StackArrays = 1;
}
+ if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
+ clang::driver::options::OPT_fno_loop_versioning, false)) {
+ opts.LoopVersioning = 1;
+ }
for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
opts.LLVMPassPlugins.push_back(a->getValue());
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index af6d52da9c4d..f3e643ef99a1 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -665,7 +665,8 @@ void CodeGenAction::generateLLVMIR() {
// Create the pass pipeline
fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays,
- opts.Underscoring, opts.getDebugInfo());
+ opts.Underscoring, opts.LoopVersioning,
+ opts.getDebugInfo());
(void)mlir::applyPassManagerCLOptions(pm);
// run the pass manager
@@ -704,7 +705,6 @@ void CodeGenAction::generateLLVMIR() {
llvmModule->setPIELevel(
static_cast<llvm::PIELevel::Level>(opts.PICLevel));
}
-
}
bool CodeGenAction::setUpTargetMachine() {
diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index c365fbf16de3..68b106cbccbf 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -52,6 +52,8 @@
! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler
! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
+! CHECK-NEXT: -fno-version-loops-for-stride
+! CHECK-NEXT: Do not create unit-strided loops (default)
! CHECK-NEXT: -fopenacc Enable OpenACC
! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -59,6 +61,8 @@
! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! CHECK-NEXT: -funderscoring Appends one trailing underscore to external names
+! CHECK-NEXT: -fversion-loops-for-stride
+! CHECK-NEXT: Create unit-strided versions of loops
! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! CHECK-NEXT: -gline-tables-only Emit debug line number tables only
! CHECK-NEXT: -g Generate source-level debug information
diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index 25871db04891..f57fd32b0838 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -48,6 +48,8 @@
! HELP-NEXT: -fno-integrated-as Disable the integrated assembler
! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
+! HELP-NEXT: -fno-version-loops-for-stride
+! HELP-NEXT: Do not create unit-strided loops (default)
! HELP-NEXT: -fopenacc Enable OpenACC
! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -55,6 +57,8 @@
! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-NEXT: -funderscoring Appends one trailing underscore to external names
+! HELP-NEXT: -fversion-loops-for-stride
+! HELP-NEXT: Create unit-strided versions of loops
! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-NEXT: -gline-tables-only Emit debug line number tables only
! HELP-NEXT: -g Generate source-level debug information
@@ -146,6 +150,8 @@
! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode
! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
+! HELP-FC1-NEXT: -fno-version-loops-for-stride
+! HELP-FC1-NEXT: Do not create unit-strided loops (default)
! HELP-FC1-NEXT: -fopenacc Enable OpenACC
! HELP-FC1-NEXT: -fopenmp-is-device Generate code only for an OpenMP target device.
! HELP-FC1-NEXT: -fopenmp-target-debug Enable debugging in the OpenMP offloading device RTL
@@ -155,6 +161,8 @@
! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-FC1-NEXT: -funderscoring Appends one trailing underscore to external names
+! HELP-FC1-NEXT: -fversion-loops-for-stride
+! HELP-FC1-NEXT: Create unit-strided versions of loops
! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-FC1-NEXT: -help Display available options
! HELP-FC1-NEXT: -init-only Only execute frontend initialization
diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90
index 7bc05420c537..ddee84bac106 100644
--- a/flang/test/Driver/frontend-forwarding.f90
+++ b/flang/test/Driver/frontend-forwarding.f90
@@ -15,6 +15,8 @@
! RUN: -fassociative-math \
! RUN: -freciprocal-math \
! RUN: -fpass-plugin=Bye%pluginext \
+! RUN: -fversion-loops-for-stride \
+! RUN: -mllvm -print-before-all\
! RUN: -mllvm -print-before-all \
! RUN: -save-temps=obj \
! RUN: -P \
@@ -34,5 +36,6 @@
! CHECK: "-freciprocal-math"
! CHECK: "-fconvert=little-endian"
! CHECK: "-fpass-plugin=Bye
+! CHECK: "-fversion-loops-for-stride"
! CHECK: "-mllvm" "-print-before-all"
! CHECK: "-save-temps=obj"
diff --git a/flang/test/Driver/version-loops.f90 b/flang/test/Driver/version-loops.f90
new file mode 100644
index 000000000000..b0fa01d57251
--- /dev/null
+++ b/flang/test/Driver/version-loops.f90
@@ -0,0 +1,54 @@
+! Test that flang-new forwards the -f{no-,}version-loops-for-stride
+! options correctly to flang-new -fc1 for
diff erent variants of optimisation
+! and explicit flags.
+
+! RUN: %flang -### %s -o %t 2>&1 -O3 \
+! RUN: | FileCheck %s
+
+! RUN: %flang -### %s -o %t 2>&1 -O2 \
+! RUN: | FileCheck %s --check-prefix=CHECK-O2
+
+! RUN: %flang -### %s -o %t 2>&1 -O2 -fversion-loops-for-stride \
+! RUN: | FileCheck %s --check-prefix=CHECK-O2-with
+
+! RUN: %flang -### %s -o %t 2>&1 -O4 \
+! RUN: | FileCheck %s --check-prefix=CHECK-O4
+
+! RUN: %flang -### %s -o %t 2>&1 -Ofast \
+! RUN: | FileCheck %s --check-prefix=CHECK-Ofast
+
+! RUN: %flang -### %s -o %t 2>&1 -Ofast -fno-version-loops-for-stride \
+! RUN: | FileCheck %s --check-prefix=CHECK-Ofast-no
+
+! RUN: %flang -### %s -o %t 2>&1 -O3 -fno-version-loops-for-stride \
+! RUN: | FileCheck %s --check-prefix=CHECK-O3-no
+
+! CHECK: "{{.*}}flang-new" "-fc1"
+! CHECK-SAME: "-fversion-loops-for-stride"
+! CHECK-SAME: "-O3"
+
+! CHECK-O2: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-NOT: "-fversion-loops-for-stride"
+! CHECK-O2-SAME: "-O2"
+
+! CHECK-O2-with: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-with-SAME: "-fversion-loops-for-stride"
+! CHECK-O2-with-SAME: "-O2"
+
+! CHECK-O4: "{{.*}}flang-new" "-fc1"
+! CHECK-O4-SAME: "-fversion-loops-for-stride"
+! CHECK-O4-SAME: "-O3"
+
+! CHECK-Ofast: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-SAME: "-ffast-math"
+! CHECK-Ofast-SAME: "-fversion-loops-for-stride"
+! CHECK-Ofast-SAME: "-O3"
+
+! CHECK-Ofast-no: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-no-SAME: "-ffast-math"
+! CHECK-Ofast-no-NOT: "-fversion-loops-for-stride"
+! CHECK-Ofast-no-SAME: "-O3"
+
+! CHECK-O3-no: "{{.*}}flang-new" "-fc1"
+! CHECK-O3-no-NOT: "-fversion-loops-for-stride"
+! CHECK-O3-no-SAME: "-O3"
More information about the flang-commits
mailing list