[flang-commits] [clang] [flang] [flang] Add -f[no-]vectorize flags (PR #119718)
David Truby via flang-commits
flang-commits at lists.llvm.org
Thu Feb 6 06:15:24 PST 2025
https://github.com/DavidTruby updated https://github.com/llvm/llvm-project/pull/119718
>From 0dc613d94560cbe4e8a57eed35d985e9d6dae752 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Thu, 12 Dec 2024 14:50:19 +0000
Subject: [PATCH] [flang] Add -f[no-]vectorize flags
---
clang/include/clang/Driver/Driver.h | 38 ++---
clang/include/clang/Driver/Options.td | 11 +-
clang/lib/Driver/Driver.cpp | 133 +++++++++---------
clang/lib/Driver/ToolChains/Clang.cpp | 33 -----
clang/lib/Driver/ToolChains/CommonArgs.cpp | 87 ++++++++----
clang/lib/Driver/ToolChains/CommonArgs.h | 2 +
clang/lib/Driver/ToolChains/Flang.cpp | 10 ++
.../include/flang/Frontend/CodeGenOptions.def | 1 +
flang/lib/Frontend/CompilerInvocation.cpp | 4 +
flang/lib/Frontend/FrontendActions.cpp | 2 +
flang/test/Driver/optimization-remark.f90 | 22 +--
flang/test/Integration/unroll-loops.f90 | 4 +-
flang/test/Lower/HLFIR/unroll-loops.fir | 4 +-
13 files changed, 175 insertions(+), 176 deletions(-)
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index f4a52cc529b79cd..bca9cfd85c367cd 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -55,12 +55,7 @@ class JobAction;
class ToolChain;
/// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options.
-enum LTOKind {
- LTOK_None,
- LTOK_Full,
- LTOK_Thin,
- LTOK_Unknown
-};
+enum LTOKind { LTOK_None, LTOK_Full, LTOK_Thin, LTOK_Unknown };
/// Whether headers used to construct C++20 module units should be looked
/// up by the path supplied on the command line, or in the user or system
@@ -110,17 +105,9 @@ class Driver {
DXCMode
} Mode;
- enum SaveTempsMode {
- SaveTempsNone,
- SaveTempsCwd,
- SaveTempsObj
- } SaveTemps;
+ enum SaveTempsMode { SaveTempsNone, SaveTempsCwd, SaveTempsObj } SaveTemps;
- enum BitcodeEmbedMode {
- EmbedNone,
- EmbedMarker,
- EmbedBitcode
- } BitcodeEmbed;
+ enum BitcodeEmbedMode { EmbedNone, EmbedMarker, EmbedBitcode } BitcodeEmbed;
enum OffloadMode {
OffloadHostDevice,
@@ -166,9 +153,7 @@ class Driver {
};
// Diag - Forwarding function for diagnostics.
- DiagnosticBuilder Diag(unsigned DiagID) const {
- return Diags.Report(DiagID);
- }
+ DiagnosticBuilder Diag(unsigned DiagID) const { return Diags.Report(DiagID); }
// FIXME: Privatize once interface is stable.
public:
@@ -404,7 +389,6 @@ class Driver {
SmallString<128> &CrashDiagDir);
public:
-
/// Takes the path to a binary that's either in bin/ or lib/ and returns
/// the path to clang's resource directory.
static std::string GetResourcesPath(StringRef BinaryPath);
@@ -419,9 +403,7 @@ class Driver {
/// Name to use when invoking gcc/g++.
const std::string &getCCCGenericGCCName() const { return CCCGenericGCCName; }
- llvm::ArrayRef<std::string> getConfigFiles() const {
- return ConfigFiles;
- }
+ llvm::ArrayRef<std::string> getConfigFiles() const { return ConfigFiles; }
const llvm::opt::OptTable &getOpts() const { return getDriverOptTable(); }
@@ -447,9 +429,7 @@ class Driver {
std::string getTargetTriple() const { return TargetTriple; }
/// Get the path to the main clang executable.
- const char *getClangProgramPath() const {
- return ClangExecutable.c_str();
- }
+ const char *getClangProgramPath() const { return ClangExecutable.c_str(); }
bool isSaveTempsEnabled() const { return SaveTemps != SaveTempsNone; }
bool isSaveTempsObj() const { return SaveTemps == SaveTempsObj; }
@@ -561,8 +541,9 @@ class Driver {
/// This routine handles additional processing that must be done in addition
/// to just running the subprocesses, for example reporting errors, setting
/// up response files, removing temporary files, etc.
- int ExecuteCompilation(Compilation &C,
- SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands);
+ int ExecuteCompilation(
+ Compilation &C,
+ SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands);
/// Contains the files in the compilation diagnostic report generated by
/// generateCompilationDiagnostics.
@@ -758,7 +739,6 @@ class Driver {
const CUIDOptions &getCUIDOpts() const { return CUIDOpts; }
private:
-
/// Tries to load options from configuration files.
///
/// \returns true if error occurred.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 77ca2d2aac31be1..3cc9492eac1c200 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3981,11 +3981,15 @@ defm assumptions : BoolFOption<"assumptions",
"Disable codegen and compile-time checks for C++23's [[assume]] attribute">,
PosFlag<SetTrue>>;
+
+let Visibility = [ClangOption, FlangOption] in {
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
HelpText<"Enable the loop vectorization passes">;
def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>;
def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>;
def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>;
+}
+
def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>,
HelpText<"Enable the superword-level parallelism vectorization passes">;
def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>;
@@ -7315,6 +7319,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
def mlink_bitcode_file
: Separate<["-"], "mlink-bitcode-file">,
HelpText<"Link the given bitcode file before performing optimizations.">;
+
+def vectorize_loops : Flag<["-"], "vectorize-loops">,
+ HelpText<"Run the Loop vectorization passes">,
+ MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
} // let Visibility = [CC1Option, FC1Option]
let Visibility = [CC1Option] in {
@@ -7430,9 +7438,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt",
PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the "
"optimization pipeline">,
NegFlag<SetFalse, [], [ClangOption]>>;
-def vectorize_loops : Flag<["-"], "vectorize-loops">,
- HelpText<"Run the Loop vectorization passes">,
- MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
def vectorize_slp : Flag<["-"], "vectorize-slp">,
HelpText<"Run the SLP vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 5a4737fb381e6a0..dc5424a454513a5 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -398,8 +398,7 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
- (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) ||
- CCGenDiagnostics) {
+ (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) {
FinalPhase = phases::Preprocess;
// --precompile only runs up to precompilation.
@@ -424,18 +423,18 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
FinalPhase = phases::Compile;
- // -S only runs up to the backend.
+ // -S only runs up to the backend.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
FinalPhase = phases::Backend;
- // -c compilation only runs up to the assembler.
+ // -c compilation only runs up to the assembler.
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
} else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) {
FinalPhase = phases::IfsMerge;
- // Otherwise do everything.
+ // Otherwise do everything.
} else
FinalPhase = phases::Link;
@@ -568,8 +567,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
///
/// This routine provides the logic to compute a target triple from various
/// args passed to the driver and the default triple string.
-static llvm::Triple computeTargetTriple(const Driver &D,
- StringRef TargetTriple,
+static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple,
const ArgList &Args,
StringRef DarwinArchName = "") {
// FIXME: Already done in Compilation *Driver::BuildCompilation
@@ -692,8 +690,8 @@ static llvm::Triple computeTargetTriple(const Driver &D,
// Handle -miamcu flag.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
- D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
- << Target.str();
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << "-miamcu" << Target.str();
if (A && !A->getOption().matches(options::OPT_m32))
D.Diag(diag::err_drv_argument_not_allowed_with)
@@ -1627,14 +1625,13 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
StringRef Name = A->getValue();
unsigned Model = llvm::StringSwitch<unsigned>(Name)
- .Case("off", EmbedNone)
- .Case("all", EmbedBitcode)
- .Case("bitcode", EmbedBitcode)
- .Case("marker", EmbedMarker)
- .Default(~0U);
+ .Case("off", EmbedNone)
+ .Case("all", EmbedBitcode)
+ .Case("bitcode", EmbedBitcode)
+ .Case("marker", EmbedMarker)
+ .Default(~0U);
if (Model == ~0U) {
- Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
- << Name;
+ Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
} else
BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
}
@@ -1863,7 +1860,7 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
if (LineEnd == StringRef::npos)
continue;
- StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
+ StringRef ParentProcess = Data.slice(ParentProcPos + 15, LineEnd).trim();
int OpenBracket = -1, CloseBracket = -1;
for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
if (ParentProcess[i] == '[')
@@ -1876,7 +1873,8 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
int CrashPID;
if (OpenBracket < 0 || CloseBracket < 0 ||
ParentProcess.slice(OpenBracket + 1, CloseBracket)
- .getAsInteger(10, CrashPID) || CrashPID != PID) {
+ .getAsInteger(10, CrashPID) ||
+ CrashPID != PID) {
continue;
}
@@ -2132,8 +2130,7 @@ void Driver::generateCompilationDiagnostics(
CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "Crash backtrace is located in";
- Diag(clang::diag::note_drv_command_failed_diag_msg)
- << CrashDiagDir.str();
+ Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str();
Diag(clang::diag::note_drv_command_failed_diag_msg)
<< "(choose the .crash file that corresponds to your crash)";
}
@@ -2245,8 +2242,7 @@ void Driver::PrintHelp(bool ShowHidden) const {
std::string Usage = llvm::formatv("{0} [options] file...", Name).str();
getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(),
- ShowHidden, /*ShowAllAliases=*/false,
- VisibilityMask);
+ ShowHidden, /*ShowAllAliases=*/false, VisibilityMask);
}
void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
@@ -2423,11 +2419,11 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
if (C.getArgs().hasArg(options::OPT_v)) {
if (!SystemConfigDir.empty())
- llvm::errs() << "System configuration file directory: "
- << SystemConfigDir << "\n";
+ llvm::errs() << "System configuration file directory: " << SystemConfigDir
+ << "\n";
if (!UserConfigDir.empty())
- llvm::errs() << "User configuration file directory: "
- << UserConfigDir << "\n";
+ llvm::errs() << "User configuration file directory: " << UserConfigDir
+ << "\n";
}
const ToolChain &TC = C.getDefaultToolChain();
@@ -2506,7 +2502,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
StringRef ProgName = A->getValue();
// Null program name cannot have a path.
- if (! ProgName.empty())
+ if (!ProgName.empty())
llvm::outs() << GetProgramPath(ProgName, TC);
llvm::outs() << "\n";
@@ -2740,7 +2736,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
- for (Action* Act : SingleActions) {
+ for (Action *Act : SingleActions) {
// Make sure we can lipo this kind of output. If not (and it is an actual
// output) then we disallow, since we can't create an output file with the
// right name without overwriting it. We could remove this oddity by just
@@ -2783,7 +2779,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
// Verify the debug info output.
if (Args.hasArg(options::OPT_verify_debug_info)) {
- Action* LastAction = Actions.back();
+ Action *LastAction = Actions.back();
Actions.pop_back();
Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
LastAction, types::TY_Nothing));
@@ -2964,7 +2960,8 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
- if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
+ if (IsCLMode() &&
+ (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
Ty = types::TY_CXX;
else if (CCCIsCPP() || CCGenDiagnostics)
Ty = types::TY_C;
@@ -3168,7 +3165,7 @@ class OffloadingActionBuilder final {
virtual void appendLinkDeviceActions(ActionList &AL) {}
/// Append linker host action generated by the builder.
- virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
+ virtual Action *appendLinkHostActions(ActionList &AL) { return nullptr; }
/// Append linker actions generated by the builder.
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
@@ -3837,15 +3834,15 @@ class OffloadingActionBuilder final {
for (auto &LI : DeviceLinkerInputs) {
types::ID Output = Args.hasArg(options::OPT_emit_llvm)
- ? types::TY_LLVM_BC
- : types::TY_Image;
+ ? types::TY_LLVM_BC
+ : types::TY_Image;
auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
- DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
- GpuArchList[I], AssociatedOffloadKind);
+ DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I],
+ AssociatedOffloadKind);
Actions.push_back(C.MakeAction<OffloadAction>(
DeviceLinkDeps, DeviceLinkAction->getType()));
++I;
@@ -3854,8 +3851,8 @@ class OffloadingActionBuilder final {
// If emitting LLVM, do not generate final host/device compilation action
if (Args.hasArg(options::OPT_emit_llvm)) {
- AL.append(Actions);
- return;
+ AL.append(Actions);
+ return;
}
// Create a host object from all the device images by embedding them
@@ -3876,7 +3873,7 @@ class OffloadingActionBuilder final {
}
}
- Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
+ Action *appendLinkHostActions(ActionList &AL) override { return AL.back(); }
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
};
@@ -4118,7 +4115,7 @@ class OffloadingActionBuilder final {
return nullptr;
// Let builders add host linking actions.
- Action* HA = nullptr;
+ Action *HA = nullptr;
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
@@ -4213,7 +4210,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
getOpts().getOption(options::OPT_frtlib_add_rpath));
}
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
- if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
+ if (Args.hasArg(options::OPT_emit_llvm) &&
+ !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
if (C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() &&
LTOMode != LTOK_None &&
@@ -5589,8 +5587,8 @@ class ToolSelector final {
continue;
}
- // This is legal to combine. Append any offload action we found and add the
- // current input to preprocessor inputs.
+ // This is legal to combine. Append any offload action we found and add
+ // the current input to preprocessor inputs.
CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
PreprocessJobOffloadActions.end());
NewInputs.append(PJ->input_begin(), PJ->input_end());
@@ -5613,8 +5611,7 @@ class ToolSelector final {
/// connected to collapsed actions are updated accordingly. The latter enables
/// the caller of the selector to process them afterwards instead of just
/// dropping them. If no suitable tool is found, null will be returned.
- const Tool *getTool(ActionList &Inputs,
- ActionList &CollapsedOffloadAction) {
+ const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) {
//
// Get the largest chain of actions that we could combine.
//
@@ -5657,7 +5654,7 @@ class ToolSelector final {
return T;
}
};
-}
+} // namespace
/// Return a string that uniquely identifies the result of a job. The bound arch
/// is not necessarily represented in the toolchain's triple -- for example,
@@ -5828,9 +5825,9 @@ InputInfoList Driver::BuildJobsForActionNoCache(
StringRef ArchName = BAA->getArchName();
if (!ArchName.empty())
- TC = &getToolChain(C.getArgs(),
- computeTargetTriple(*this, TargetTriple,
- C.getArgs(), ArchName));
+ TC = &getToolChain(
+ C.getArgs(),
+ computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName));
else
TC = &C.getDefaultToolChain();
@@ -5839,7 +5836,6 @@ InputInfoList Driver::BuildJobsForActionNoCache(
TargetDeviceOffloadKind);
}
-
ActionList Inputs = A->getInputs();
const JobAction *JA = cast<JobAction>(A);
@@ -5973,10 +5969,11 @@ InputInfoList Driver::BuildJobsForActionNoCache(
/*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
!(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
AtTopLevel));
- Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
- AtTopLevel, MultipleArchs,
- OffloadingPrefix),
- BaseInput);
+ Result =
+ InputInfo(A,
+ GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel,
+ MultipleArchs, OffloadingPrefix),
+ BaseInput);
if (T->canEmitIR() && OffloadingPrefix.empty())
handleTimeTrace(C, Args, JA, BaseInput, Result);
}
@@ -6304,12 +6301,10 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
}
} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));
- } else if ((JA.getType() == types::TY_Plist || JA.getType() == types::TY_AST) &&
+ } else if ((JA.getType() == types::TY_Plist ||
+ JA.getType() == types::TY_AST) &&
C.getArgs().hasArg(options::OPT__SLASH_o)) {
- StringRef Val =
- C.getArgs()
- .getLastArg(options::OPT__SLASH_o)
- ->getValue();
+ StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_o)->getValue();
NamedOutput =
MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
} else {
@@ -6645,15 +6640,15 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
case llvm::Triple::Linux:
case llvm::Triple::ELFIAMCU:
if (Target.getArch() == llvm::Triple::hexagon)
- TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
- Args);
+ TC =
+ std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
!Target.hasEnvironment())
TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
- Args);
+ Args);
else if (Target.isPPC())
TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
- Args);
+ Args);
else if (Target.getArch() == llvm::Triple::ve)
TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
else if (Target.isOHOSFamily())
@@ -6698,7 +6693,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
break;
case llvm::Triple::Itanium:
TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
- Args);
+ Args);
break;
case llvm::Triple::MSVC:
case llvm::Triple::UnknownEnvironment:
@@ -6707,8 +6702,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::CrossWindowsToolChain>(
*this, Target, Args);
else
- TC =
- std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
+ TC = std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
break;
}
break;
@@ -6742,8 +6736,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
break;
case llvm::Triple::hexagon:
- TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
- Args);
+ TC =
+ std::make_unique<toolchains::HexagonToolChain>(*this, Target, Args);
break;
case llvm::Triple::lanai:
TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
@@ -6759,8 +6753,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
break;
case llvm::Triple::msp430:
- TC =
- std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
+ TC = std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
break;
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
@@ -6943,7 +6936,7 @@ Driver::getOptionVisibilityMask(bool UseDriverMode) const {
return llvm::opt::Visibility(options::CLOption);
if (IsDXCMode())
return llvm::opt::Visibility(options::DXCOption);
- if (IsFlangMode()) {
+ if (IsFlangMode()) {
return llvm::opt::Visibility(options::FlangOption);
}
return llvm::opt::Visibility(options::ClangOption);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a0757d71b140c20..2c98fc1d01feda9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
}
}
-/// Vectorize at all optimization levels greater than 1 except for -Oz.
-/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
-/// enabled.
-static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
- if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
- if (A->getOption().matches(options::OPT_O4) ||
- A->getOption().matches(options::OPT_Ofast))
- return true;
-
- if (A->getOption().matches(options::OPT_O0))
- return false;
-
- assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
-
- // Vectorize -Os.
- StringRef S(A->getValue());
- if (S == "s")
- return true;
-
- // Don't vectorize -Oz, unless it's the slp vectorizer.
- if (S == "z")
- return isSlpVec;
-
- unsigned OptLevel = 0;
- if (S.getAsInteger(10, OptLevel))
- return false;
-
- return OptLevel > 1;
- }
-
- return false;
-}
-
/// Add -x lang to \p CmdArgs for \p Input.
static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
ArgStringList &CmdArgs) {
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 699aadec86dcba9..7c3eb8c9732366b 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -973,7 +973,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
// LowerMatrixIntrinsicsPass, which is transitively called by
// buildThinLTODefaultPipeline under EnableMatrix.
if ((IsThinLTO || IsFatLTO || IsUnifiedLTO) &&
- Args.hasArg(options::OPT_fenable_matrix))
+ Args.hasArg(options::OPT_fenable_matrix))
CmdArgs.push_back(
Args.MakeArgString(Twine(PluginOptPrefix) + "-enable-matrix"));
@@ -1284,7 +1284,7 @@ bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs,
CmdArgs.push_back("-Bdynamic");
if (RTKind == Driver::OMPRT_GOMP && GompNeedsRT)
- CmdArgs.push_back("-lrt");
+ CmdArgs.push_back("-lrt");
if (IsOffloadingHost)
CmdArgs.push_back("-lomptarget");
@@ -1379,10 +1379,12 @@ static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
bool IsShared, bool IsWhole) {
// Wrap any static runtimes that must be forced into executable in
// whole-archive.
- if (IsWhole) CmdArgs.push_back("--whole-archive");
+ if (IsWhole)
+ CmdArgs.push_back("--whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(
Args, Sanitizer, IsShared ? ToolChain::FT_Shared : ToolChain::FT_Static));
- if (IsWhole) CmdArgs.push_back("--no-whole-archive");
+ if (IsWhole)
+ CmdArgs.push_back("--no-whole-archive");
if (IsShared) {
addArchSpecificRPath(TC, Args, CmdArgs);
@@ -1678,7 +1680,8 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
return !StaticRuntimes.empty() || !NonWholeStaticRuntimes.empty();
}
-bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
+bool tools::addXRayRuntime(const ToolChain &TC, const ArgList &Args,
+ ArgStringList &CmdArgs) {
if (Args.hasArg(options::OPT_shared)) {
if (TC.getXRayArgs().needsXRayDSORt()) {
CmdArgs.push_back("--whole-archive");
@@ -1707,8 +1710,7 @@ void tools::linkXRayRuntimeDeps(const ToolChain &TC,
CmdArgs.push_back("-lrt");
CmdArgs.push_back("-lm");
- if (!TC.getTriple().isOSFreeBSD() &&
- !TC.getTriple().isOSNetBSD() &&
+ if (!TC.getTriple().isOSFreeBSD() && !TC.getTriple().isOSNetBSD() &&
!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-ldl");
}
@@ -1983,19 +1985,19 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
bool EmbeddedPISupported;
switch (Triple.getArch()) {
- case llvm::Triple::arm:
- case llvm::Triple::armeb:
- case llvm::Triple::thumb:
- case llvm::Triple::thumbeb:
- EmbeddedPISupported = true;
- break;
- default:
- EmbeddedPISupported = false;
- break;
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb:
+ case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
+ EmbeddedPISupported = true;
+ break;
+ default:
+ EmbeddedPISupported = false;
+ break;
}
bool ROPI = false, RWPI = false;
- Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi);
+ Arg *LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi);
if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) {
if (!EmbeddedPISupported)
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
@@ -2024,7 +2026,7 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
if (ABIName == "n64")
PIC = true;
// When targettng MIPS with -mno-abicalls, it's always static.
- if(Args.hasArg(options::OPT_mno_abicalls))
+ if (Args.hasArg(options::OPT_mno_abicalls))
return std::make_tuple(llvm::Reloc::Static, 0U, false);
// Unlike other architectures, MIPS, even with -fPIC/-mxgot/multigot,
// does not use PIC level 2 for historical reasons.
@@ -2190,7 +2192,8 @@ enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc };
static LibGccType getLibGccType(const ToolChain &TC, const Driver &D,
const ArgList &Args) {
if (Args.hasArg(options::OPT_static_libgcc) ||
- Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie) ||
+ Args.hasArg(options::OPT_static) ||
+ Args.hasArg(options::OPT_static_pie) ||
// The Android NDK only provides libunwind.a, not libunwind.so.
TC.getTriple().isAndroid())
return LibGccType::StaticLibGcc;
@@ -2558,11 +2561,10 @@ static void GetSDLFromOffloadArchive(
return;
StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
- std::string OutputLib =
- D.GetTemporaryPath(Twine(Prefix + llvm::sys::path::filename(Lib) + "-" +
- Arch + "-" + Target)
- .str(),
- "a");
+ std::string OutputLib = D.GetTemporaryPath(
+ Twine(Prefix + llvm::sys::path::filename(Lib) + "-" + Arch + "-" + Target)
+ .str(),
+ "a");
C.addTempFile(C.getArgs().MakeArgString(OutputLib));
@@ -2775,8 +2777,8 @@ void tools::addMachineOutlinerArgs(const Driver &D,
}
};
- if (Arg *A = Args.getLastArg(options::OPT_moutline,
- options::OPT_mno_outline)) {
+ if (Arg *A =
+ Args.getLastArg(options::OPT_moutline, options::OPT_mno_outline)) {
if (A->getOption().matches(options::OPT_moutline)) {
// We only support -moutline in AArch64 and ARM targets right now. If
// we're not compiling for these, emit a warning and ignore the flag.
@@ -3115,3 +3117,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args,
CmdArgs.push_back("-fwrapv-pointer");
}
}
+
+/// Vectorize at all optimization levels greater than 1 except for -Oz.
+/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
+/// enabled.
+bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
+ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+ if (A->getOption().matches(options::OPT_O4) ||
+ A->getOption().matches(options::OPT_Ofast))
+ return true;
+
+ if (A->getOption().matches(options::OPT_O0))
+ return false;
+
+ assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
+
+ // Vectorize -Os.
+ StringRef S(A->getValue());
+ if (S == "s")
+ return true;
+
+ // Don't vectorize -Oz, unless it's the slp vectorizer.
+ if (S == "z")
+ return isSlpVec;
+
+ unsigned OptLevel = 0;
+ if (S.getAsInteger(10, OptLevel))
+ return false;
+
+ return OptLevel > 1;
+ }
+
+ return false;
+}
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index b6ddd99b872798e..783a1f834b33d73 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC,
void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
+bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
+ bool isSlpVec);
} // end namespace tools
} // end namespace driver
} // end namespace clang
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index e4019c434968744..1fe2d30da72aaf5 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");
+ // Enable vectorization per default according to the optimization level
+ // selected. For optimization levels that want vectorization we use the alias
+ // option to simplify the hasFlag logic.
+ bool enableVec = shouldEnableVectorizerAtOLevel(Args, false);
+ OptSpecifier vectorizeAliasOption =
+ enableVec ? options::OPT_O_Group : options::OPT_fvectorize;
+ if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption,
+ options::OPT_fno_vectorize, enableVec))
+ CmdArgs.push_back("-vectorize-loops");
+
if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index deb8d1aede518b3..44cb5a2cdd497ad 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
+CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index a2c1d3efef6cf3c..66717d7cf1ad476 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -23,6 +23,7 @@
#include "clang/Basic/AllDiagnostics.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/OptionUtils.h"
#include "clang/Driver/Options.h"
@@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_stack_arrays, false))
opts.StackArrays = 1;
+ if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
+ opts.VectorizeLoop = 1;
+
if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
clang::driver::options::OPT_fno_loop_versioning, false))
opts.LoopVersioning = 1;
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index b0545a7ac2f99a7..af32eba56120b45 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -1030,6 +1030,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterleaving = opts.UnrollLoops;
+ pto.LoopVectorization = opts.VectorizeLoop;
+
llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
// Attempt to load pass plugins and register their callbacks with PB.
diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90
index e90baa892f46a01..90e310d36c807e9 100644
--- a/flang/test/Driver/optimization-remark.f90
+++ b/flang/test/Driver/optimization-remark.f90
@@ -5,33 +5,33 @@
! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1
! Check fc1 can handle -Rpass
-! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
+! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
! Check that we can override -Rpass= with -Rno-pass.
-! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted
-! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
-! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
-! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
+! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! Check valid -Rpass regex
-! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
! Check valid -Rpass-missed regex
-! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
! Check valid -Rpass-analysis regex
-! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
+! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
! Check full -Rpass message is emitted
-! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
+! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
! Check full -Rpass-missed message is emitted
-! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
+! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
! Check full -Rpass-analysis message is emitted
-! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
+! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
! REMARKS: remark:
! NO-REMARKS-NOT: remark:
diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90
index c3fcf1c3a7cf3af..debe45e0ec35957 100644
--- a/flang/test/Integration/unroll-loops.f90
+++ b/flang/test/Integration/unroll-loops.f90
@@ -1,8 +1,8 @@
! FIXME: https://github.com/llvm/llvm-project/issues/123668
!
! DEFINE: %{triple} =
-! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
-! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
!
! REDEFINE: %{triple} = aarch64-unknown-linux-gnu
! RUN: %if aarch64-registered-target %{ %{check-unroll} %}
diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir
index 6a9dd28a37b6d92..1321f39677405b9 100644
--- a/flang/test/Lower/HLFIR/unroll-loops.fir
+++ b/flang/test/Lower/HLFIR/unroll-loops.fir
@@ -1,6 +1,6 @@
// DEFINE: %{triple} =
-// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
-// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
+// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
+// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
// REDEFINE: %{triple} = aarch64-unknown-linux-gnu
// RUN: %if aarch64-registered-target %{ %{check-unroll} %}
More information about the flang-commits
mailing list