[clang] efae695 - Add -f[no-]loop-versioning option

Mats Petersson via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 18 01:48:39 PDT 2023


Author: Mats Petersson
Date: 2023-04-18T09:47:54+01:00
New Revision: efae695d52ccc987ddd3eb042eeb11f06cb2383b

URL: https://github.com/llvm/llvm-project/commit/efae695d52ccc987ddd3eb042eeb11f06cb2383b
DIFF: https://github.com/llvm/llvm-project/commit/efae695d52ccc987ddd3eb042eeb11f06cb2383b.diff

LOG: Add -f[no-]loop-versioning option

Add flags for loop-versioning pass enable/disable

Reviewed By: awarzynski, tblah

Differential Revision: https://reviews.llvm.org/D141307

Added: 
    flang/test/Driver/version-loops.f90

Modified: 
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Flang.cpp
    clang/lib/Driver/ToolChains/Flang.h
    flang/include/flang/Frontend/CodeGenOptions.def
    flang/include/flang/Tools/CLOptions.inc
    flang/lib/Frontend/CompilerInvocation.cpp
    flang/lib/Frontend/FrontendActions.cpp
    flang/test/Driver/driver-help-hidden.f90
    flang/test/Driver/driver-help.f90
    flang/test/Driver/frontend-forwarding.f90

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7fa1d7d636bb..eed0d517a1ad 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5167,7 +5167,9 @@ def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
 defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
   PosFlag<SetTrue, [], "Attempt to allocate array temporaries on the stack, no matter their size">,
   NegFlag<SetFalse, [], "Allocate array temporaries on the heap (default)">>;
-
+defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
+  PosFlag<SetTrue, [], "Create unit-strided versions of loops">,
+   NegFlag<SetFalse, [], "Do not create unit-strided loops (default)">>;
 } // let Flags = [FC1Option, FlangOption, FlangOnlyOption]
 
 def J : JoinedOrSeparate<["-"], "J">,

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 57555ed32553..300072db35ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -52,6 +52,55 @@ void Flang::addPreprocessingOptions(const ArgList &Args,
                    options::OPT_I, options::OPT_cpp, options::OPT_nocpp});
 }
 
+/// @C shouldLoopVersion
+///
+/// Check if Loop Versioning should be enabled.
+/// We look for the last of one of the following:
+///   -Ofast, -O4, -O<number> and -f[no-]version-loops-for-stride.
+/// Loop versioning is disabled if the last option is
+///  -fno-version-loops-for-stride.
+/// Loop versioning is enabled if the last option is one of:
+///  -floop-versioning
+///  -Ofast
+///  -O4
+///  -O3
+/// For all other cases, loop versioning is is disabled.
+///
+/// The gfortran compiler automatically enables the option for -O3 or -Ofast.
+///
+/// @return true if loop-versioning should be enabled, otherwise false.
+static bool shouldLoopVersion(const ArgList &Args) {
+  const Arg *LoopVersioningArg = Args.getLastArg(
+      options::OPT_Ofast, options::OPT_O, options::OPT_O4,
+      options::OPT_floop_versioning, options::OPT_fno_loop_versioning);
+  if (!LoopVersioningArg)
+    return false;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_fno_loop_versioning))
+    return false;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_floop_versioning))
+    return true;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_Ofast) ||
+      LoopVersioningArg->getOption().matches(options::OPT_O4))
+    return true;
+
+  if (LoopVersioningArg->getOption().matches(options::OPT_O)) {
+    StringRef S(LoopVersioningArg->getValue());
+    unsigned OptLevel = 0;
+    // Note -Os or Oz woould "fail" here, so return false. Which is the
+    // desiered behavior.
+    if (S.getAsInteger(10, OptLevel))
+      return false;
+
+    return OptLevel > 2;
+  }
+
+  llvm_unreachable("We should not end up here");
+  return false;
+}
+
 void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   Args.AddAllArgs(CmdArgs,
                   {options::OPT_module_dir, options::OPT_fdebug_module_writer,
@@ -60,16 +109,6 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
                    options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ,
                    options::OPT_funderscoring, options::OPT_fno_underscoring});
 
-  Arg *stackArrays =
-      Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
-                      options::OPT_fno_stack_arrays);
-  if (stackArrays &&
-      !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
-    CmdArgs.push_back("-fstack-arrays");
-
-  if (Args.hasArg(options::OPT_flang_experimental_hlfir))
-    CmdArgs.push_back("-flang-experimental-hlfir");
-
   llvm::codegenoptions::DebugInfoKind DebugInfoKind;
   if (Args.hasArg(options::OPT_gN_Group)) {
     Arg *gNArg = Args.getLastArg(options::OPT_gN_Group);
@@ -82,6 +121,21 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   addDebugInfoKind(CmdArgs, DebugInfoKind);
 }
 
+void Flang::addCodegenOptions(const ArgList &Args,
+                              ArgStringList &CmdArgs) const {
+  Arg *stackArrays =
+      Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays,
+                      options::OPT_fno_stack_arrays);
+  if (stackArrays &&
+      !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
+    CmdArgs.push_back("-fstack-arrays");
+
+  if (Args.hasArg(options::OPT_flang_experimental_hlfir))
+    CmdArgs.push_back("-flang-experimental-hlfir");
+  if (shouldLoopVersion(Args))
+    CmdArgs.push_back("-fversion-loops-for-stride");
+}
+
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
   // ParsePICArgs parses -fPIC/-fPIE and their variants and returns a tuple of
   // (RelocationModel, PICLevel, IsPIE).
@@ -391,6 +445,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
   // Add target args, features, etc.
   addTargetOptions(Args, CmdArgs);
 
+  // Add Codegen options
+  addCodegenOptions(Args, CmdArgs);
+
   // Add other compile options
   addOtherOptions(Args, CmdArgs);
 

diff  --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 0dc3cb7eeadc..962b4ae60172 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -67,6 +67,14 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
                          const JobAction &JA, const llvm::opt::ArgList &Args,
                          llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Extract options for code generation from the driver arguments and add them
+  /// to the command arguments.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void addCodegenOptions(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract other compilation options from the driver arguments and add them
   /// to the command arguments.
   ///

diff  --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dbdfe073d2de..c3a04108aa08 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
 CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
                                      ///< compile step.
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 
 CODEGENOPT(Underscoring, 1, 1)
 ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.

diff  --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 2c5efeceda62..cb19d5ddfd9b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -186,7 +186,7 @@ inline void addExternalNameConversionPass(
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
-    bool stackArrays = false) {
+    bool stackArrays = false, bool loopVersioning = false) {
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
@@ -283,11 +283,13 @@ inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
 inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
     llvm::OptimizationLevel optLevel = defaultOptLevel,
     bool stackArrays = false, bool underscoring = true,
+    bool loopVersioning = false,
     llvm::codegenoptions::DebugInfoKind debugInfo = NoDebugInfo) {
   fir::createHLFIRToFIRPassPipeline(pm, optLevel);
 
   // Add default optimizer pass pipeline.
-  fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays);
+  fir::createDefaultFIROptimizerPassPipeline(
+      pm, optLevel, stackArrays, loopVersioning);
 
   // Add codegen pass pipeline.
   fir::createDefaultFIRCodeGenPassPipeline(

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index e05ce4343f4c..c6d5152c38d1 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -163,6 +163,10 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                    clang::driver::options::OPT_fno_stack_arrays, false)) {
     opts.StackArrays = 1;
   }
+  if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
+                   clang::driver::options::OPT_fno_loop_versioning, false)) {
+    opts.LoopVersioning = 1;
+  }
 
   for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
     opts.LLVMPassPlugins.push_back(a->getValue());

diff  --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index af6d52da9c4d..f3e643ef99a1 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -665,7 +665,8 @@ void CodeGenAction::generateLLVMIR() {
 
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays,
-                                    opts.Underscoring, opts.getDebugInfo());
+                                    opts.Underscoring, opts.LoopVersioning,
+                                    opts.getDebugInfo());
   (void)mlir::applyPassManagerCLOptions(pm);
 
   // run the pass manager
@@ -704,7 +705,6 @@ void CodeGenAction::generateLLVMIR() {
       llvmModule->setPIELevel(
           static_cast<llvm::PIELevel::Level>(opts.PICLevel));
   }
-
 }
 
 bool CodeGenAction::setUpTargetMachine() {

diff  --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index c365fbf16de3..68b106cbccbf 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -52,6 +52,8 @@
 ! CHECK-NEXT: -fno-integrated-as     Disable the integrated assembler
 ! CHECK-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! CHECK-NEXT: -fno-version-loops-for-stride
+! CHECK-NEXT:                        Do not create unit-strided loops (default)
 ! CHECK-NEXT: -fopenacc              Enable OpenACC
 ! CHECK-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -59,6 +61,8 @@
 ! CHECK-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! CHECK-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! CHECK-NEXT: -funderscoring         Appends one trailing underscore to external names
+! CHECK-NEXT: -fversion-loops-for-stride
+! CHECK-NEXT:                        Create unit-strided versions of loops
 ! CHECK-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! CHECK-NEXT: -gline-tables-only     Emit debug line number tables only
 ! CHECK-NEXT: -g                     Generate source-level debug information

diff  --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index 25871db04891..f57fd32b0838 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -48,6 +48,8 @@
 ! HELP-NEXT: -fno-integrated-as      Disable the integrated assembler
 ! HELP-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-NEXT: -fno-version-loops-for-stride
+! HELP-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-NEXT: -fopenacc              Enable OpenACC
 ! HELP-NEXT: -fopenmp               Parse OpenMP pragmas and generate parallel code.
 ! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
@@ -55,6 +57,8 @@
 ! HELP-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-NEXT: -fversion-loops-for-stride
+! HELP-NEXT:                        Create unit-strided versions of loops
 ! HELP-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-NEXT: -gline-tables-only     Emit debug line number tables only
 ! HELP-NEXT: -g                     Generate source-level debug information
@@ -146,6 +150,8 @@
 ! HELP-FC1-NEXT: -fno-reformat          Dump the cooked character stream in -E mode
 ! HELP-FC1-NEXT: -fno-signed-zeros      Allow optimizations that ignore the sign of floating point zeros
 ! HELP-FC1-NEXT: -fno-stack-arrays      Allocate array temporaries on the heap (default)
+! HELP-FC1-NEXT: -fno-version-loops-for-stride
+! HELP-FC1-NEXT:                        Do not create unit-strided loops (default)
 ! HELP-FC1-NEXT: -fopenacc              Enable OpenACC
 ! HELP-FC1-NEXT: -fopenmp-is-device     Generate code only for an OpenMP target device.
 ! HELP-FC1-NEXT: -fopenmp-target-debug  Enable debugging in the OpenMP offloading device RTL
@@ -155,6 +161,8 @@
 ! HELP-FC1-NEXT: -fstack-arrays         Attempt to allocate array temporaries on the stack, no matter their size
 ! HELP-FC1-NEXT: -fsyntax-only          Run the preprocessor, parser and semantic analysis stages
 ! HELP-FC1-NEXT: -funderscoring         Appends one trailing underscore to external names
+! HELP-FC1-NEXT: -fversion-loops-for-stride
+! HELP-FC1-NEXT:                        Create unit-strided versions of loops
 ! HELP-FC1-NEXT: -fxor-operator         Enable .XOR. as a synonym of .NEQV.
 ! HELP-FC1-NEXT: -help                  Display available options
 ! HELP-FC1-NEXT: -init-only             Only execute frontend initialization

diff  --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90
index 7bc05420c537..ddee84bac106 100644
--- a/flang/test/Driver/frontend-forwarding.f90
+++ b/flang/test/Driver/frontend-forwarding.f90
@@ -15,6 +15,8 @@
 ! RUN:     -fassociative-math \
 ! RUN:     -freciprocal-math \
 ! RUN:     -fpass-plugin=Bye%pluginext \
+! RUN:     -fversion-loops-for-stride \
+! RUN:     -mllvm -print-before-all\
 ! RUN:     -mllvm -print-before-all \
 ! RUN:     -save-temps=obj \
 ! RUN:     -P \
@@ -34,5 +36,6 @@
 ! CHECK: "-freciprocal-math"
 ! CHECK: "-fconvert=little-endian"
 ! CHECK: "-fpass-plugin=Bye
+! CHECK: "-fversion-loops-for-stride"  
 ! CHECK: "-mllvm" "-print-before-all"
 ! CHECK: "-save-temps=obj"

diff  --git a/flang/test/Driver/version-loops.f90 b/flang/test/Driver/version-loops.f90
new file mode 100644
index 000000000000..b0fa01d57251
--- /dev/null
+++ b/flang/test/Driver/version-loops.f90
@@ -0,0 +1,54 @@
+! Test that flang-new forwards the -f{no-,}version-loops-for-stride 
+! options correctly to flang-new -fc1 for 
diff erent variants of optimisation
+! and explicit flags.
+
+! RUN: %flang -### %s -o %t 2>&1   -O3 \
+! RUN:   | FileCheck %s
+  
+! RUN: %flang -### %s -o %t 2>&1 -O2 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2
+
+! RUN: %flang -### %s -o %t 2>&1  -O2 -fversion-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O2-with
+  
+! RUN: %flang -### %s -o %t 2>&1  -O4 \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O4
+  
+! RUN: %flang -### %s -o %t 2>&1  -Ofast \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast
+  
+! RUN: %flang -### %s -o %t 2>&1 -Ofast -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-Ofast-no
+
+! RUN: %flang -### %s -o %t 2>&1 -O3 -fno-version-loops-for-stride \
+! RUN:   | FileCheck %s --check-prefix=CHECK-O3-no
+
+! CHECK: "{{.*}}flang-new" "-fc1"
+! CHECK-SAME: "-fversion-loops-for-stride"
+! CHECK-SAME: "-O3"
+
+! CHECK-O2: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-NOT: "-fversion-loops-for-stride"
+! CHECK-O2-SAME: "-O2"  
+
+! CHECK-O2-with: "{{.*}}flang-new" "-fc1"
+! CHECK-O2-with-SAME: "-fversion-loops-for-stride"
+! CHECK-O2-with-SAME: "-O2"  
+  
+! CHECK-O4: "{{.*}}flang-new" "-fc1"
+! CHECK-O4-SAME: "-fversion-loops-for-stride"
+! CHECK-O4-SAME: "-O3"
+
+! CHECK-Ofast: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-SAME: "-ffast-math"
+! CHECK-Ofast-SAME: "-fversion-loops-for-stride"
+! CHECK-Ofast-SAME: "-O3"
+
+! CHECK-Ofast-no: "{{.*}}flang-new" "-fc1"
+! CHECK-Ofast-no-SAME: "-ffast-math"
+! CHECK-Ofast-no-NOT: "-fversion-loops-for-stride"
+! CHECK-Ofast-no-SAME: "-O3"
+
+! CHECK-O3-no: "{{.*}}flang-new" "-fc1"
+! CHECK-O3-no-NOT: "-fversion-loops-for-stride"
+! CHECK-O3-no-SAME: "-O3"


        


More information about the cfe-commits mailing list