[flang-commits] [clang] [flang] [flang] Add -f[no-]vectorize flags (PR #119718)
David Truby via flang-commits
flang-commits at lists.llvm.org
Thu Dec 12 08:36:59 PST 2024
https://github.com/DavidTruby created https://github.com/llvm/llvm-project/pull/119718
This patch adds the -fvectorize and -fno-vectorize flags to flang.
Note that this also changes the behaviour of `flang -fc1` to match that of `clang -cc1`, which is that vectorization is only enabled in the presence of the `-vectorize-loops` flag.
Additionally, this patch changes the behaviour of the default optimisation levels to match clang, such that vectorization only happens at the same levels as it does there.
This patch is in draft while I write an RFC to discuss the above two changes.
>From 0b9cbef8073cdfc511087c315deb82a90c408640 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Thu, 12 Dec 2024 14:50:19 +0000
Subject: [PATCH] [flang] Add -f[no-]vectorize flags
---
clang/include/clang/Driver/Driver.h | 3 ++
clang/include/clang/Driver/Options.td | 11 ++++--
clang/lib/Driver/Driver.cpp | 34 +++++++++++++++++++
clang/lib/Driver/ToolChains/Clang.cpp | 33 ------------------
clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++
.../include/flang/Frontend/CodeGenOptions.def | 1 +
flang/lib/Frontend/CompilerInvocation.cpp | 4 +++
flang/lib/Frontend/FrontendActions.cpp | 3 ++
flang/test/Driver/optimization-remark.f90 | 22 ++++++------
9 files changed, 74 insertions(+), 47 deletions(-)
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index c23d037e725bb9..4c4375b25902c4 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -856,6 +856,9 @@ void applyOverrideOptions(SmallVectorImpl<const char *> &Args,
llvm::StringSet<> &SavedStrings,
raw_ostream *OS = nullptr);
+bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
+ bool isSlpVec);
+
} // end namespace driver
} // end namespace clang
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 88862ae9edb29d..bc3c1e4a0045ff 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4038,11 +4038,15 @@ defm assumptions : BoolFOption<"assumptions",
"Disable codegen and compile-time checks for C++23's [[assume]] attribute">,
PosFlag<SetTrue>>;
+
+let Visibility = [ClangOption, FlangOption] in {
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
HelpText<"Enable the loop vectorization passes">;
def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>;
def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>;
def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>;
+}
+
def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>,
HelpText<"Enable the superword-level parallelism vectorization passes">;
def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>;
@@ -7323,6 +7327,10 @@ let Visibility = [CC1Option, FC1Option] in {
def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
HelpText<"Link and internalize needed symbols from the given bitcode file "
"before performing optimizations.">;
+
+def vectorize_loops : Flag<["-"], "vectorize-loops">,
+ HelpText<"Run the Loop vectorization passes">,
+ MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
} // let Visibility = [CC1Option, FC1Option]
let Visibility = [CC1Option] in {
@@ -7439,9 +7447,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt",
PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the "
"optimization pipeline">,
NegFlag<SetFalse, [], [ClangOption]>>;
-def vectorize_loops : Flag<["-"], "vectorize-loops">,
- HelpText<"Run the Loop vectorization passes">,
- MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
def vectorize_slp : Flag<["-"], "vectorize-slp">,
HelpText<"Run the SLP vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index fb73b62cf2daed..1510af0047fc47 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -6980,3 +6980,37 @@ void driver::applyOverrideOptions(SmallVectorImpl<const char *> &Args,
++S;
}
}
+
+/// Vectorize at all optimization levels greater than 1 except for -Oz.
+/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
+/// enabled.
+bool driver::shouldEnableVectorizerAtOLevel(const ArgList &Args,
+ bool isSlpVec) {
+ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+ if (A->getOption().matches(options::OPT_O4) ||
+ A->getOption().matches(options::OPT_Ofast))
+ return true;
+
+ if (A->getOption().matches(options::OPT_O0))
+ return false;
+
+ assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
+
+ // Vectorize -Os.
+ StringRef S(A->getValue());
+ if (S == "s")
+ return true;
+
+ // Don't vectorize -Oz, unless it's the slp vectorizer.
+ if (S == "z")
+ return isSlpVec;
+
+ unsigned OptLevel = 0;
+ if (S.getAsInteger(10, OptLevel))
+ return false;
+
+ return OptLevel > 1;
+ }
+
+ return false;
+}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d3206c3e8e25ed..69fdec4e74e08a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -503,39 +503,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
}
}
-/// Vectorize at all optimization levels greater than 1 except for -Oz.
-/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
-/// enabled.
-static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
- if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
- if (A->getOption().matches(options::OPT_O4) ||
- A->getOption().matches(options::OPT_Ofast))
- return true;
-
- if (A->getOption().matches(options::OPT_O0))
- return false;
-
- assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
-
- // Vectorize -Os.
- StringRef S(A->getValue());
- if (S == "s")
- return true;
-
- // Don't vectorize -Oz, unless it's the slp vectorizer.
- if (S == "z")
- return isSlpVec;
-
- unsigned OptLevel = 0;
- if (S.getAsInteger(10, OptLevel))
- return false;
-
- return OptLevel > 1;
- }
-
- return false;
-}
-
/// Add -x lang to \p CmdArgs for \p Input.
static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
ArgStringList &CmdArgs) {
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c98fdbd157bac8..5b229562583ff5 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -143,6 +143,16 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");
+ // Enable vectorization per default according to the optimization level
+ // selected. For optimization levels that want vectorization we use the alias
+ // option to simplify the hasFlag logic.
+ bool enableVec = shouldEnableVectorizerAtOLevel(Args, false);
+ OptSpecifier vectorizeAliasOption =
+ enableVec ? options::OPT_O_Group : options::OPT_fvectorize;
+ if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption,
+ options::OPT_fno_vectorize, enableVec))
+ CmdArgs.push_back("-vectorize-loops");
+
if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index 9d03ec88a56b8a..b5d6bc6146e92b 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 648b88e84051c2..b517e86ca8c9a1 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -23,6 +23,7 @@
#include "clang/Basic/AllDiagnostics.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/OptionUtils.h"
#include "clang/Driver/Options.h"
@@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_stack_arrays, false))
opts.StackArrays = 1;
+ if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
+ opts.VectorizeLoop = 1;
+
if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
clang::driver::options::OPT_fno_loop_versioning, false))
opts.LoopVersioning = 1;
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 77631f70dfd19f..3801408cf7967d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -982,6 +982,9 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
llvm::StandardInstrumentations si(llvmModule->getContext(),
opts.DebugPassManager);
si.registerCallbacks(pic, &mam);
+
+ pto.LoopVectorization = opts.VectorizeLoop;
+
llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
// Attempt to load pass plugins and register their callbacks with PB.
diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90
index e90baa892f46a0..90e310d36c807e 100644
--- a/flang/test/Driver/optimization-remark.f90
+++ b/flang/test/Driver/optimization-remark.f90
@@ -5,33 +5,33 @@
! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1
! Check fc1 can handle -Rpass
-! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
+! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
! Check that we can override -Rpass= with -Rno-pass.
-! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted
-! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
-! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
-! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! Check valid -Rpass regex
-! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
! Check valid -Rpass-missed regex
-! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
! Check valid -Rpass-analysis regex
-! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
! Check full -Rpass message is emitted
-! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
+! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
! Check full -Rpass-missed message is emitted
-! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
+! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
! Check full -Rpass-analysis message is emitted
-! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
+! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
! REMARKS: remark:
! NO-REMARKS-NOT: remark:
More information about the flang-commits
mailing list