[flang-commits] [clang] [flang] [mlir] Add basic -mtune support (PR #98517)
Alexis Perry-Holby via flang-commits
flang-commits at lists.llvm.org
Thu Jul 11 12:15:31 PDT 2024
https://github.com/AlexisPerry updated https://github.com/llvm/llvm-project/pull/98517
>From 2e26f0f66f070cd0b684531efc63e63e2e584dfa Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Thu, 11 Jul 2024 12:51:39 -0600
Subject: [PATCH 1/2] Add basic -mtune support
Initial implementation for the -mtune flag in Flang.
---
clang/include/clang/Driver/Options.td | 7 +++---
clang/lib/Driver/ToolChains/Flang.cpp | 10 +++++++-
flang/include/flang/Frontend/TargetOptions.h | 3 +++
flang/include/flang/Lower/Bridge.h | 6 ++---
.../flang/Optimizer/CodeGen/CGPasses.td | 4 +++
.../include/flang/Optimizer/CodeGen/Target.h | 21 ++++++++++++++--
.../Optimizer/Dialect/Support/FIRContext.h | 7 ++++++
.../flang/Optimizer/Transforms/Passes.td | 5 +++-
flang/lib/Frontend/CompilerInvocation.cpp | 4 +++
flang/lib/Frontend/FrontendActions.cpp | 3 ++-
flang/lib/Lower/Bridge.cpp | 3 ++-
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 6 ++++-
flang/lib/Optimizer/CodeGen/Target.cpp | 11 ++++++++
flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 12 ++++++++-
flang/lib/Optimizer/CodeGen/TypeConverter.cpp | 3 ++-
.../Optimizer/Dialect/Support/FIRContext.cpp | 18 +++++++++++++
flang/test/Driver/tune-cpu-fir.f90 | 25 +++++++++++++++++++
flang/test/Lower/tune-cpu-llvm.f90 | 8 ++++++
flang/tools/bbc/bbc.cpp | 3 ++-
flang/tools/tco/tco.cpp | 4 +++
flang/unittests/Optimizer/FIRContextTest.cpp | 5 +++-
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 1 +
mlir/lib/Target/LLVMIR/ModuleImport.cpp | 5 ++++
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 3 +++
mlir/test/Target/LLVMIR/Import/tune-cpu.ll | 16 ++++++++++++
mlir/test/Target/LLVMIR/tune-cpu.mlir | 14 +++++++++++
26 files changed, 190 insertions(+), 17 deletions(-)
create mode 100644 flang/test/Driver/tune-cpu-fir.f90
create mode 100644 flang/test/Lower/tune-cpu-llvm.f90
create mode 100644 mlir/test/Target/LLVMIR/Import/tune-cpu.ll
create mode 100644 mlir/test/Target/LLVMIR/tune-cpu.mlir
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index cfb37b3c5b474..8d49a4708aaf0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5436,6 +5436,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[]>,
HelpText<"Provide information about a particular module file">;
def mthumb : Flag<["-"], "mthumb">, Group<m_Group>;
def mtune_EQ : Joined<["-"], "mtune=">, Group<m_Group>,
+ Visibility<[ClangOption, FlangOption]>,
HelpText<"Only supported on AArch64, PowerPC, RISC-V, SPARC, SystemZ, and X86">;
def multi__module : Flag<["-"], "multi_module">;
def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">;
@@ -6760,9 +6761,6 @@ def emit_hlfir : Flag<["-"], "emit-hlfir">, Group<Action_Group>,
let Visibility = [CC1Option, CC1AsOption] in {
-def tune_cpu : Separate<["-"], "tune-cpu">,
- HelpText<"Tune for a specific cpu type">,
- MarshallingInfoString<TargetOpts<"TuneCPU">>;
def target_abi : Separate<["-"], "target-abi">,
HelpText<"Target a particular ABI type">,
MarshallingInfoString<TargetOpts<"ABI">>;
@@ -6789,6 +6787,9 @@ def darwin_target_variant_triple : Separate<["-"], "darwin-target-variant-triple
let Visibility = [CC1Option, CC1AsOption, FC1Option] in {
+def tune_cpu : Separate<["-"], "tune-cpu">,
+ HelpText<"Tune for a specific cpu type">,
+ MarshallingInfoString<TargetOpts<"TuneCPU">>;
def target_cpu : Separate<["-"], "target-cpu">,
HelpText<"Target a specific cpu type">,
MarshallingInfoString<TargetOpts<"CPU">>;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index ee8292a508f93..7e42bad258cc6 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -15,6 +15,7 @@
#include "llvm/Frontend/Debug/Options.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
#include "llvm/TargetParser/RISCVTargetParser.h"
@@ -411,6 +412,13 @@ void Flang::addTargetOptions(const ArgList &Args,
}
// TODO: Add target specific flags, ABI, mtune option etc.
+ if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
+ CmdArgs.push_back("-tune-cpu");
+ if (A->getValue() == StringRef{"native"})
+ CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName()));
+ else
+ CmdArgs.push_back(A->getValue());
+ }
}
void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
@@ -807,7 +815,7 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
case CodeGenOptions::FramePointerKind::None:
FPKeepKindStr = "-mframe-pointer=none";
break;
- case CodeGenOptions::FramePointerKind::Reserved:
+ case CodeGenOptions::FramePointerKind::Reserved:
FPKeepKindStr = "-mframe-pointer=reserved";
break;
case CodeGenOptions::FramePointerKind::NonLeaf:
diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h
index ef5d270a2185d..fa72c77a028a1 100644
--- a/flang/include/flang/Frontend/TargetOptions.h
+++ b/flang/include/flang/Frontend/TargetOptions.h
@@ -32,6 +32,9 @@ class TargetOptions {
/// If given, the name of the target CPU to generate code for.
std::string cpu;
+ /// If given, the name of the target CPU to tune code for.
+ std::string cpuToTuneFor;
+
/// The list of target specific features to enable or disable, as written on
/// the command line.
std::vector<std::string> featuresAsWritten;
diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h
index 52110b861b680..4379ed512cdf0 100644
--- a/flang/include/flang/Lower/Bridge.h
+++ b/flang/include/flang/Lower/Bridge.h
@@ -65,11 +65,11 @@ class LoweringBridge {
const Fortran::lower::LoweringOptions &loweringOptions,
const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
const Fortran::common::LanguageFeatureControl &languageFeatures,
- const llvm::TargetMachine &targetMachine) {
+ const llvm::TargetMachine &targetMachine, llvm::StringRef tuneCPU) {
return LoweringBridge(ctx, semanticsContext, defaultKinds, intrinsics,
targetCharacteristics, allCooked, triple, kindMap,
loweringOptions, envDefaults, languageFeatures,
- targetMachine);
+ targetMachine, tuneCPU);
}
//===--------------------------------------------------------------------===//
@@ -148,7 +148,7 @@ class LoweringBridge {
const Fortran::lower::LoweringOptions &loweringOptions,
const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
const Fortran::common::LanguageFeatureControl &languageFeatures,
- const llvm::TargetMachine &targetMachine);
+ const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU);
LoweringBridge() = delete;
LoweringBridge(const LoweringBridge &) = delete;
diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 9a4d327b33bad..989e3943882a1 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -31,6 +31,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> {
"Override module's data layout.">,
Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
"Override module's target CPU.">,
+ Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
+ "Override module's tune CPU.">,
Option<"forcedTargetFeatures", "target-features", "std::string",
/*default=*/"", "Override module's target features.">,
Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false",
@@ -68,6 +70,8 @@ def TargetRewritePass : Pass<"target-rewrite", "mlir::ModuleOp"> {
"Override module's target triple.">,
Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
"Override module's target CPU.">,
+ Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
+ "Override module's tune CPU.">,
Option<"forcedTargetFeatures", "target-features", "std::string",
/*default=*/"", "Override module's target features.">,
Option<"noCharacterConversion", "no-character-conversion",
diff --git a/flang/include/flang/Optimizer/CodeGen/Target.h b/flang/include/flang/Optimizer/CodeGen/Target.h
index 3cf6a74a9adb7..2b3b2152ac80c 100644
--- a/flang/include/flang/Optimizer/CodeGen/Target.h
+++ b/flang/include/flang/Optimizer/CodeGen/Target.h
@@ -76,6 +76,11 @@ class CodeGenSpecifics {
llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
const mlir::DataLayout &dl);
+ static std::unique_ptr<CodeGenSpecifics>
+ get(mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap,
+ llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
+ const mlir::DataLayout &dl, llvm::StringRef tuneCPU);
+
static TypeAndAttr getTypeAndAttr(mlir::Type t) { return TypeAndAttr{t, {}}; }
CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp,
@@ -83,7 +88,17 @@ class CodeGenSpecifics {
mlir::LLVM::TargetFeaturesAttr targetFeatures,
const mlir::DataLayout &dl)
: context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)},
- targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl} {}
+ targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl},
+ tuneCPU{""} {}
+
+ CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp,
+ KindMapping &&kindMap, llvm::StringRef targetCPU,
+ mlir::LLVM::TargetFeaturesAttr targetFeatures,
+ const mlir::DataLayout &dl, llvm::StringRef tuneCPU)
+ : context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)},
+ targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl},
+ tuneCPU{tuneCPU} {}
+
CodeGenSpecifics() = delete;
virtual ~CodeGenSpecifics() {}
@@ -165,7 +180,8 @@ class CodeGenSpecifics {
virtual unsigned char getCIntTypeWidth() const = 0;
llvm::StringRef getTargetCPU() const { return targetCPU; }
-
+ llvm::StringRef getTuneCPU() const { return tuneCPU; }
+
mlir::LLVM::TargetFeaturesAttr getTargetFeatures() const {
return targetFeatures;
}
@@ -182,6 +198,7 @@ class CodeGenSpecifics {
llvm::StringRef targetCPU;
mlir::LLVM::TargetFeaturesAttr targetFeatures;
const mlir::DataLayout *dataLayout = nullptr;
+ llvm::StringRef tuneCPU;
};
} // namespace fir
diff --git a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
index 059a10ce2fe51..b69f1415040ec 100644
--- a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
+++ b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
@@ -58,6 +58,13 @@ void setTargetCPU(mlir::ModuleOp mod, llvm::StringRef cpu);
/// Get the target CPU string from the Module or return a null reference.
llvm::StringRef getTargetCPU(mlir::ModuleOp mod);
+/// Set the tune CPU for the module. `cpu` must not be deallocated while
+/// module `mod` is still live.
+void setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu);
+
+/// Get the tune CPU string from the Module or return a null reference.
+llvm::StringRef getTuneCPU(mlir::ModuleOp mod);
+
/// Set the target features for the module.
void setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index b3ed9acad36df..786083f95e15c 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -411,7 +411,10 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
Option<"unsafeFPMath", "unsafe-fp-math",
"bool", /*default=*/"false",
"Set the unsafe-fp-math attribute on functions in the module.">,
- ];
+ Option<"tuneCPU", "tune-cpu",
+ "llvm::StringRef", /*default=*/"llvm::StringRef{}",
+ "Set the tune-cpu attribute on functions in the module.">,
+];
}
def AssumedRankOpConversion : Pass<"fir-assumed-rank-op", "mlir::ModuleOp"> {
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index e2d60ad46f14f..3d66a946fc946 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -431,6 +431,10 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
args.getLastArg(clang::driver::options::OPT_target_cpu))
opts.cpu = a->getValue();
+ if (const llvm::opt::Arg *a =
+ args.getLastArg(clang::driver::options::OPT_tune_cpu))
+ opts.cpuToTuneFor = a->getValue();
+
for (const llvm::opt::Arg *currentArg :
args.filtered(clang::driver::options::OPT_target_feature))
opts.featuresAsWritten.emplace_back(currentArg->getValue());
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index a85ecd1ac71b3..5c86bd947ce73 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -297,7 +297,8 @@ bool CodeGenAction::beginSourceFileAction() {
ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple,
kindMap, ci.getInvocation().getLoweringOpts(),
ci.getInvocation().getFrontendOpts().envDefaults,
- ci.getInvocation().getFrontendOpts().features, targetMachine);
+ ci.getInvocation().getFrontendOpts().features, targetMachine,
+ ci.getInvocation().getTargetOpts().cpuToTuneFor);
// Fetch module from lb, so we can set
mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 3d071f6bb8d5a..b998709dccd8c 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -6020,7 +6020,7 @@ Fortran::lower::LoweringBridge::LoweringBridge(
const Fortran::lower::LoweringOptions &loweringOptions,
const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
const Fortran::common::LanguageFeatureControl &languageFeatures,
- const llvm::TargetMachine &targetMachine)
+ const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU)
: semanticsContext{semanticsContext}, defaultKinds{defaultKinds},
intrinsics{intrinsics}, targetCharacteristics{targetCharacteristics},
cooked{&cooked}, context{context}, kindMap{kindMap},
@@ -6077,6 +6077,7 @@ Fortran::lower::LoweringBridge::LoweringBridge(
fir::setTargetTriple(*module.get(), triple);
fir::setKindMapping(*module.get(), kindMap);
fir::setTargetCPU(*module.get(), targetMachine.getTargetCPU());
+ fir::setTuneCPU(*module.get(), tuneCPU);
fir::setTargetFeatures(*module.get(), targetMachine.getTargetFeatureString());
fir::support::setMLIRDataLayout(*module.get(),
targetMachine.createDataLayout());
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7483acfcd1ca7..e370a33b7c4a7 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3618,6 +3618,9 @@ class FIRToLLVMLowering
if (!forcedTargetCPU.empty())
fir::setTargetCPU(mod, forcedTargetCPU);
+ if (!forcedTuneCPU.empty())
+ fir::setTuneCPU(mod, forcedTuneCPU);
+
if (!forcedTargetFeatures.empty())
fir::setTargetFeatures(mod, forcedTargetFeatures);
@@ -3714,7 +3717,8 @@ class FIRToLLVMLowering
signalPassFailure();
}
- // Run pass to add comdats to functions that have weak linkage on relevant platforms
+ // Run pass to add comdats to functions that have weak linkage on relevant
+ // platforms
if (fir::getTargetTriple(mod).supportsCOMDAT()) {
mlir::OpPassManager comdatPM("builtin.module");
comdatPM.addPass(mlir::LLVM::createLLVMAddComdats());
diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 652e2bddc1b89..25141102a8c43 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -1113,3 +1113,14 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
}
TODO(mlir::UnknownLoc::get(ctx), "target not implemented");
}
+
+std::unique_ptr<fir::CodeGenSpecifics> fir::CodeGenSpecifics::get(
+ mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap,
+ llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
+ const mlir::DataLayout &dl, llvm::StringRef tuneCPU) {
+ std::unique_ptr<fir::CodeGenSpecifics> CGS = fir::CodeGenSpecifics::get(
+ ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
+
+ CGS->tuneCPU = tuneCPU;
+ return CGS;
+}
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 561d700f41220..b52f2b9325ece 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -89,6 +89,9 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
if (!forcedTargetCPU.empty())
fir::setTargetCPU(mod, forcedTargetCPU);
+ if (!forcedTuneCPU.empty())
+ fir::setTuneCPU(mod, forcedTuneCPU);
+
if (!forcedTargetFeatures.empty())
fir::setTargetFeatures(mod, forcedTargetFeatures);
@@ -106,7 +109,8 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
auto specifics = fir::CodeGenSpecifics::get(
mod.getContext(), fir::getTargetTriple(mod), fir::getKindMapping(mod),
- fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl);
+ fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl,
+ fir::getTuneCPU(mod));
setMembers(specifics.get(), &rewriter, &*dl);
@@ -672,12 +676,18 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
auto targetCPU = specifics->getTargetCPU();
mlir::StringAttr targetCPUAttr =
targetCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, targetCPU);
+ auto tuneCPU = specifics->getTuneCPU();
+ mlir::StringAttr tuneCPUAttr =
+ tuneCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, tuneCPU);
auto targetFeaturesAttr = specifics->getTargetFeatures();
for (auto fn : mod.getOps<mlir::func::FuncOp>()) {
if (targetCPUAttr)
fn->setAttr("target_cpu", targetCPUAttr);
+ if (tuneCPUAttr)
+ fn->setAttr("tune_cpu", tuneCPUAttr);
+
if (targetFeaturesAttr)
fn->setAttr("target_features", targetFeaturesAttr);
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index ce86c625e082f..a28b03442fe83 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -35,7 +35,8 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
kindMapping(getKindMapping(module)),
specifics(CodeGenSpecifics::get(
module.getContext(), getTargetTriple(module), getKindMapping(module),
- getTargetCPU(module), getTargetFeatures(module), dl)),
+ getTargetCPU(module), getTargetFeatures(module), dl,
+ getTuneCPU(module))),
tbaaBuilder(std::make_unique<TBAABuilder>(module->getContext(), applyTBAA,
forceUnifiedTBAATree)),
dataLayout{&dl} {
diff --git a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
index c4d00875c45e4..1aa631cb39126 100644
--- a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
+++ b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
@@ -77,6 +77,24 @@ llvm::StringRef fir::getTargetCPU(mlir::ModuleOp mod) {
return {};
}
+static constexpr const char *tuneCpuName = "fir.tune_cpu";
+
+void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) {
+ if (cpu.empty())
+ return;
+
+ auto *ctx = mod.getContext();
+
+ mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu));
+}
+
+llvm::StringRef fir::getTuneCPU(mlir::ModuleOp mod) {
+ if (auto attr = mod->getAttrOfType<mlir::StringAttr>(tuneCpuName))
+ return attr.getValue();
+
+ return {};
+}
+
static constexpr const char *targetFeaturesName = "fir.target_features";
void fir::setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features) {
diff --git a/flang/test/Driver/tune-cpu-fir.f90 b/flang/test/Driver/tune-cpu-fir.f90
new file mode 100644
index 0000000000000..43c13b426d5d9
--- /dev/null
+++ b/flang/test/Driver/tune-cpu-fir.f90
@@ -0,0 +1,25 @@
+! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -target-cpu aarch64 %s -o - | FileCheck %s --check-prefixes=ALL,ARMCPU %}
+! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -tune-cpu neoverse-n1 %s -o - | FileCheck %s --check-prefixes=ALL,ARMTUNE %}
+! RUN: %if aarch64-registered-target %{ %flang_fc1 -emit-fir -triple aarch64-unknown-linux-gnu -target-cpu aarch64 -tune-cpu neoverse-n1 %s -o - | FileCheck %s --check-prefixes=ALL,ARMBOTH %}
+
+! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o - | FileCheck %s --check-prefixes=ALL,X86CPU %}
+! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,X86TUNE %}
+! RUN: %if x86-registered-target %{ %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,X86BOTH %}
+
+! ALL: module attributes {
+
+! ARMCPU-SAME: fir.target_cpu = "aarch64"
+! ARMCPU-NOT: fir.tune_cpu = "neoverse-n1"
+
+! ARMTUNE-SAME: fir.tune_cpu = "neoverse-n1"
+
+! ARMBOTH-SAME: fir.target_cpu = "aarch64"
+! ARMBOTH-SAME: fir.tune_cpu = "neoverse-n1"
+
+! X86CPU-SAME: fir.target_cpu = "x86-64"
+! X86CPU-NOT: fir.tune_cpu = "pentium4"
+
+! X86TUNE-SAME: fir.tune_cpu = "pentium4"
+
+! X86BOTH-SAME: fir.target_cpu = "x86-64"
+! X86BOTH-SAME: fir.tune_cpu = "pentium4"
diff --git a/flang/test/Lower/tune-cpu-llvm.f90 b/flang/test/Lower/tune-cpu-llvm.f90
new file mode 100644
index 0000000000000..dc2a68730cf23
--- /dev/null
+++ b/flang/test/Lower/tune-cpu-llvm.f90
@@ -0,0 +1,8 @@
+! RUN: %if x86-registered-target %{ %flang -mtune=pentium4 -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CHECK-X86 %}
+! RUN: %if aarch64-registered-target %{ %flang -mtune=neoverse-n1 -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CHECK-ARM %}
+
+!ALL: attributes #{{[0-9]+}} = {
+!CHECK-X86-SAME: "tune-cpu"="pentium4"
+!CHECK-ARM-SAME: "tune-cpu"="neoverse-n1"
+subroutine a
+end subroutine a
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index e5e41ad3e9cf2..07eef065daf6f 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -367,11 +367,12 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
loweringOptions.setNSWOnLoopVarInc(setNSW);
std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
+ constexpr const char *tuneCPU = "";
auto burnside = Fortran::lower::LoweringBridge::create(
ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
semanticsContext.targetCharacteristics(), parsing.allCooked(),
targetTriple, kindMap, loweringOptions, envDefaults,
- semanticsContext.languageFeatures(), targetMachine);
+ semanticsContext.languageFeatures(), targetMachine, tuneCPU);
mlir::ModuleOp mlirModule = burnside.getModule();
if (enableOpenMP) {
if (enableOpenMPGPU && !enableOpenMPDevice) {
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index 34ac0e1a5cb98..afaad39ce1268 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -58,6 +58,9 @@ static cl::opt<std::string> targetTriple("target",
static cl::opt<std::string>
targetCPU("target-cpu", cl::desc("specify a target CPU"), cl::init(""));
+static cl::opt<std::string>
+ tuneCPU("tune-cpu", cl::desc("specify a tune CPU"), cl::init(""));
+
static cl::opt<std::string>
targetFeatures("target-features", cl::desc("specify the target features"),
cl::init(""));
@@ -113,6 +116,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
fir::setTargetTriple(*owningRef, targetTriple);
fir::setKindMapping(*owningRef, kindMap);
fir::setTargetCPU(*owningRef, targetCPU);
+ fir::setTuneCPU(*owningRef, tuneCPU);
fir::setTargetFeatures(*owningRef, targetFeatures);
// tco is a testing tool, so it will happily use the target independent
// data layout if none is on the module.
diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp
index 49e1ebf23d8aa..dbc00a3b1339d 100644
--- a/flang/unittests/Optimizer/FIRContextTest.cpp
+++ b/flang/unittests/Optimizer/FIRContextTest.cpp
@@ -34,6 +34,7 @@ struct StringAttributesTests : public testing::Test {
"i10:80,l3:24,a1:8,r54:Double,r62:X86_FP80,r11:PPC_FP128";
std::string target = "powerpc64le-unknown-linux-gnu";
std::string targetCPU = "gfx90a";
+ std::string tuneCPU = "generic";
std::string targetFeatures = "+gfx9-insts,+wavefrontsize64";
mlir::ModuleOp mod;
};
@@ -42,6 +43,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) {
setTargetTriple(mod, target);
setKindMapping(mod, *kindMap);
setTargetCPU(mod, targetCPU);
+ setTuneCPU(mod, tuneCPU);
setTargetFeatures(mod, targetFeatures);
auto triple = getTargetTriple(mod);
@@ -61,7 +63,8 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) {
EXPECT_TRUE(mapStr.find("r62:X86_FP80") != std::string::npos);
EXPECT_EQ(getTargetCPU(mod), targetCPU);
-
+ EXPECT_EQ(getTuneCPU(mod), tuneCPU);
+
auto features = getTargetFeatures(mod);
auto featuresList = features.getFeatures();
EXPECT_EQ(features.getFeaturesString(), targetFeatures);
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 65dfcf93d7029..f0dec69a5032a 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1449,6 +1449,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
OptionalAttr<LLVM_VScaleRangeAttr>:$vscale_range,
OptionalAttr<FramePointerKindAttr>:$frame_pointer,
OptionalAttr<StrAttr>:$target_cpu,
+ OptionalAttr<StrAttr>:$tune_cpu,
OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features,
OptionalAttr<BoolAttr>:$unsafe_fp_math,
OptionalAttr<BoolAttr>:$no_infs_fp_math,
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 9915576bbc458..5bc3dd680d02d 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -1688,6 +1688,7 @@ static constexpr std::array kExplicitAttributes{
StringLiteral("noinline"),
StringLiteral("optnone"),
StringLiteral("target-features"),
+ StringLiteral("tune-cpu"),
StringLiteral("unsafe-fp-math"),
StringLiteral("vscale_range"),
};
@@ -1804,6 +1805,10 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
attr.isStringAttribute())
funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString()));
+ if (llvm::Attribute attr = func->getFnAttribute("tune-cpu");
+ attr.isStringAttribute())
+ funcOp.setTuneCpuAttr(StringAttr::get(context, attr.getValueAsString()));
+
if (llvm::Attribute attr = func->getFnAttribute("target-features");
attr.isStringAttribute())
funcOp.setTargetFeaturesAttr(
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 1d2e4725d5d63..2735b13a1499f 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1325,6 +1325,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
if (auto targetCpu = func.getTargetCpu())
llvmFunc->addFnAttr("target-cpu", *targetCpu);
+ if (auto tuneCpu = func.getTuneCpu())
+ llvmFunc->addFnAttr("tune-cpu", *tuneCpu);
+
if (auto targetFeatures = func.getTargetFeatures())
llvmFunc->addFnAttr("target-features", targetFeatures->getFeaturesString());
diff --git a/mlir/test/Target/LLVMIR/Import/tune-cpu.ll b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll
new file mode 100644
index 0000000000000..991a70ada473c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll
@@ -0,0 +1,16 @@
+; RUN: mlir-translate -import-llvm -split-input-file %s | FileCheck %s
+
+; CHECK-LABEL: llvm.func @tune_cpu_x86()
+; CHECK-SAME: tune_cpu = "pentium4"
+define void @tune_cpu_x86() #0 {
+ ret void
+}
+
+; CHECK-LABEL: llvm.func @tune_cpu_arm()
+; CHECK-SAME: tune_cpu = "neoverse-n1"
+define void @tune_cpu_arm() #1 {
+ ret void
+}
+
+attributes #0 = { "tune-cpu"="pentium4" }
+attributes #1 = { "tune-cpu"="neoverse-n1" }
diff --git a/mlir/test/Target/LLVMIR/tune-cpu.mlir b/mlir/test/Target/LLVMIR/tune-cpu.mlir
new file mode 100644
index 0000000000000..c7969f5eb4db0
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/tune-cpu.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK: define void @tune_cpu_x86() #[[ATTRSX86:.*]] {
+// CHECK: define void @tune_cpu_arm() #[[ATTRSARM:.*]] {
+// CHECK: attributes #[[ATTRSX86]] = { "tune-cpu"="pentium4" }
+// CHECK: attributes #[[ATTRSARM]] = { "tune-cpu"="neoverse-n1" }
+
+llvm.func @tune_cpu_x86() attributes {tune_cpu = "pentium4"} {
+ llvm.return
+}
+
+llvm.func @tune_cpu_arm() attributes {tune_cpu = "neoverse-n1"} {
+ llvm.return
+}
>From 0a9bf0d5e7a8b8ed0252f0c78c9bd010016f59d5 Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Thu, 11 Jul 2024 13:15:13 -0600
Subject: [PATCH 2/2] clang-format
---
flang/include/flang/Optimizer/CodeGen/Target.h | 2 +-
flang/include/flang/Optimizer/Dialect/Support/FIRContext.h | 2 +-
flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 2 +-
flang/tools/tco/tco.cpp | 4 ++--
flang/unittests/Optimizer/FIRContextTest.cpp | 2 +-
mlir/lib/Target/LLVMIR/ModuleImport.cpp | 4 ++--
6 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/flang/include/flang/Optimizer/CodeGen/Target.h b/flang/include/flang/Optimizer/CodeGen/Target.h
index 2b3b2152ac80c..a7161152a5c32 100644
--- a/flang/include/flang/Optimizer/CodeGen/Target.h
+++ b/flang/include/flang/Optimizer/CodeGen/Target.h
@@ -181,7 +181,7 @@ class CodeGenSpecifics {
llvm::StringRef getTargetCPU() const { return targetCPU; }
llvm::StringRef getTuneCPU() const { return tuneCPU; }
-
+
mlir::LLVM::TargetFeaturesAttr getTargetFeatures() const {
return targetFeatures;
}
diff --git a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
index b69f1415040ec..bd31aa0782493 100644
--- a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
+++ b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
@@ -64,7 +64,7 @@ void setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu);
/// Get the tune CPU string from the Module or return a null reference.
llvm::StringRef getTuneCPU(mlir::ModuleOp mod);
-
+
/// Set the target features for the module.
void setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features);
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index b52f2b9325ece..85bf90e475063 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -110,7 +110,7 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
auto specifics = fir::CodeGenSpecifics::get(
mod.getContext(), fir::getTargetTriple(mod), fir::getKindMapping(mod),
fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl,
- fir::getTuneCPU(mod));
+ fir::getTuneCPU(mod));
setMembers(specifics.get(), &rewriter, &*dl);
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index afaad39ce1268..a8c64333109ae 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -58,8 +58,8 @@ static cl::opt<std::string> targetTriple("target",
static cl::opt<std::string>
targetCPU("target-cpu", cl::desc("specify a target CPU"), cl::init(""));
-static cl::opt<std::string>
- tuneCPU("tune-cpu", cl::desc("specify a tune CPU"), cl::init(""));
+static cl::opt<std::string> tuneCPU("tune-cpu", cl::desc("specify a tune CPU"),
+ cl::init(""));
static cl::opt<std::string>
targetFeatures("target-features", cl::desc("specify the target features"),
diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp
index dbc00a3b1339d..3f8b59ac94a95 100644
--- a/flang/unittests/Optimizer/FIRContextTest.cpp
+++ b/flang/unittests/Optimizer/FIRContextTest.cpp
@@ -64,7 +64,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) {
EXPECT_EQ(getTargetCPU(mod), targetCPU);
EXPECT_EQ(getTuneCPU(mod), tuneCPU);
-
+
auto features = getTargetFeatures(mod);
auto featuresList = features.getFeatures();
EXPECT_EQ(features.getFeaturesString(), targetFeatures);
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 5bc3dd680d02d..c4b5fea8ffe8b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -133,8 +133,8 @@ static LogicalResult convertInstructionImpl(OpBuilder &odsBuilder,
if (iface.isConvertibleInstruction(inst->getOpcode()))
return iface.convertInstruction(odsBuilder, inst, llvmOperands,
moduleImport);
- // TODO: Implement the `convertInstruction` hooks in the
- // `LLVMDialectLLVMIRImportInterface` and move the following include there.
+ // TODO: Implement the `convertInstruction` hooks in the
+ // `LLVMDialectLLVMIRImportInterface` and move the following include there.
#include "mlir/Dialect/LLVMIR/LLVMOpFromLLVMIRConversions.inc"
return failure();
}
More information about the flang-commits
mailing list