[clang] [flang] [mlir] [flang] Add basic -mtune support (PR #95043)

Alexis Perry-Holby via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 12 12:16:24 PDT 2024


https://github.com/AlexisPerry updated https://github.com/llvm/llvm-project/pull/95043

>From 2312d31b14aecc6eeea2e81d221ee004e5de3efc Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Thu, 6 Jun 2024 14:02:52 -0600
Subject: [PATCH 1/8] [flang] Add basic -mtune support

---
 clang/include/clang/Driver/Options.td         |  7 ++++---
 clang/lib/Driver/ToolChains/Flang.cpp         |  8 ++++++++
 flang/include/flang/Frontend/TargetOptions.h  |  3 +++
 flang/include/flang/Lower/Bridge.h            |  6 +++---
 .../flang/Optimizer/CodeGen/CGPasses.td       |  4 ++++
 .../include/flang/Optimizer/CodeGen/Target.h  | 19 ++++++++++++++++++-
 .../Optimizer/Dialect/Support/FIRContext.h    |  7 +++++++
 .../flang/Optimizer/Transforms/Passes.td      |  3 +++
 flang/lib/Frontend/CompilerInvocation.cpp     |  4 ++++
 flang/lib/Frontend/FrontendActions.cpp        |  3 ++-
 flang/lib/Lower/Bridge.cpp                    |  3 ++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       |  6 +++++-
 flang/lib/Optimizer/CodeGen/Target.cpp        | 11 +++++++++++
 flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 12 +++++++++++-
 flang/lib/Optimizer/CodeGen/TypeConverter.cpp |  3 ++-
 .../Optimizer/Dialect/Support/FIRContext.cpp  | 18 ++++++++++++++++++
 flang/tools/bbc/bbc.cpp                       |  2 +-
 flang/tools/tco/tco.cpp                       |  4 ++++
 flang/unittests/Optimizer/FIRContextTest.cpp  |  3 +++
 .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td       |  9 +++++++++
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  1 +
 mlir/lib/Target/LLVMIR/ModuleImport.cpp       |  5 +++++
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  3 +++
 23 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d44faa55c456f..b81f480e1ed2b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5403,6 +5403,7 @@ def module_file_info : Flag<["-"], "module-file-info">, Flags<[]>,
   HelpText<"Provide information about a particular module file">;
 def mthumb : Flag<["-"], "mthumb">, Group<m_Group>;
 def mtune_EQ : Joined<["-"], "mtune=">, Group<m_Group>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Only supported on AArch64, PowerPC, RISC-V, SPARC, SystemZ, and X86">;
 def multi__module : Flag<["-"], "multi_module">;
 def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">;
@@ -6722,9 +6723,6 @@ def emit_hlfir : Flag<["-"], "emit-hlfir">, Group<Action_Group>,
 
 let Visibility = [CC1Option, CC1AsOption] in {
 
-def tune_cpu : Separate<["-"], "tune-cpu">,
-  HelpText<"Tune for a specific cpu type">,
-  MarshallingInfoString<TargetOpts<"TuneCPU">>;
 def target_abi : Separate<["-"], "target-abi">,
   HelpText<"Target a particular ABI type">,
   MarshallingInfoString<TargetOpts<"ABI">>;
@@ -6751,6 +6749,9 @@ def darwin_target_variant_triple : Separate<["-"], "darwin-target-variant-triple
 
 let Visibility = [CC1Option, CC1AsOption, FC1Option] in {
 
+def tune_cpu : Separate<["-"], "tune-cpu">,
+  HelpText<"Tune for a specific cpu type">,
+  MarshallingInfoString<TargetOpts<"TuneCPU">>;
 def target_cpu : Separate<["-"], "target-cpu">,
   HelpText<"Target a specific cpu type">,
   MarshallingInfoString<TargetOpts<"CPU">>;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..3dc7ee0ea2bff 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/TargetParser/RISCVISAInfo.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
+#include "llvm/TargetParser/Host.h"
 
 #include <cassert>
 
@@ -411,6 +412,13 @@ void Flang::addTargetOptions(const ArgList &Args,
   }
 
   // TODO: Add target specific flags, ABI, mtune option etc.
+  if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
+    CmdArgs.push_back("-tune-cpu");
+    if (strcmp(A->getValue(), "native") == 0)
+      CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName()));
+    else
+      CmdArgs.push_back(A->getValue());
+  }
 }
 
 void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h
index ef5d270a2185d..a7a7192c55cb1 100644
--- a/flang/include/flang/Frontend/TargetOptions.h
+++ b/flang/include/flang/Frontend/TargetOptions.h
@@ -32,6 +32,9 @@ class TargetOptions {
   /// If given, the name of the target CPU to generate code for.
   std::string cpu;
 
+  /// If given, the name of the target CPU to tune code for.
+  std::string tuneCPU;
+
   /// The list of target specific features to enable or disable, as written on
   /// the command line.
   std::vector<std::string> featuresAsWritten;
diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h
index 52110b861b680..4379ed512cdf0 100644
--- a/flang/include/flang/Lower/Bridge.h
+++ b/flang/include/flang/Lower/Bridge.h
@@ -65,11 +65,11 @@ class LoweringBridge {
          const Fortran::lower::LoweringOptions &loweringOptions,
          const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
          const Fortran::common::LanguageFeatureControl &languageFeatures,
-         const llvm::TargetMachine &targetMachine) {
+         const llvm::TargetMachine &targetMachine, llvm::StringRef tuneCPU) {
     return LoweringBridge(ctx, semanticsContext, defaultKinds, intrinsics,
                           targetCharacteristics, allCooked, triple, kindMap,
                           loweringOptions, envDefaults, languageFeatures,
-                          targetMachine);
+                          targetMachine, tuneCPU);
   }
 
   //===--------------------------------------------------------------------===//
@@ -148,7 +148,7 @@ class LoweringBridge {
       const Fortran::lower::LoweringOptions &loweringOptions,
       const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
       const Fortran::common::LanguageFeatureControl &languageFeatures,
-      const llvm::TargetMachine &targetMachine);
+      const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU);
   LoweringBridge() = delete;
   LoweringBridge(const LoweringBridge &) = delete;
 
diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 9a4d327b33bad..989e3943882a1 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -31,6 +31,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> {
            "Override module's data layout.">,
     Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
            "Override module's target CPU.">,
+    Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
+           "Override module's tune CPU.">,
     Option<"forcedTargetFeatures", "target-features", "std::string",
            /*default=*/"", "Override module's target features.">,
     Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false",
@@ -68,6 +70,8 @@ def TargetRewritePass : Pass<"target-rewrite", "mlir::ModuleOp"> {
            "Override module's target triple.">,
     Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
            "Override module's target CPU.">,
+    Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
+           "Override module's tune CPU.">,
     Option<"forcedTargetFeatures", "target-features", "std::string",
            /*default=*/"", "Override module's target features.">,
     Option<"noCharacterConversion", "no-character-conversion",
diff --git a/flang/include/flang/Optimizer/CodeGen/Target.h b/flang/include/flang/Optimizer/CodeGen/Target.h
index 3cf6a74a9adb7..a7161152a5c32 100644
--- a/flang/include/flang/Optimizer/CodeGen/Target.h
+++ b/flang/include/flang/Optimizer/CodeGen/Target.h
@@ -76,6 +76,11 @@ class CodeGenSpecifics {
       llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
       const mlir::DataLayout &dl);
 
+  static std::unique_ptr<CodeGenSpecifics>
+  get(mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap,
+      llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
+      const mlir::DataLayout &dl, llvm::StringRef tuneCPU);
+
   static TypeAndAttr getTypeAndAttr(mlir::Type t) { return TypeAndAttr{t, {}}; }
 
   CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp,
@@ -83,7 +88,17 @@ class CodeGenSpecifics {
                    mlir::LLVM::TargetFeaturesAttr targetFeatures,
                    const mlir::DataLayout &dl)
       : context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)},
-        targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl} {}
+        targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl},
+        tuneCPU{""} {}
+
+  CodeGenSpecifics(mlir::MLIRContext *ctx, llvm::Triple &&trp,
+                   KindMapping &&kindMap, llvm::StringRef targetCPU,
+                   mlir::LLVM::TargetFeaturesAttr targetFeatures,
+                   const mlir::DataLayout &dl, llvm::StringRef tuneCPU)
+      : context{*ctx}, triple{std::move(trp)}, kindMap{std::move(kindMap)},
+        targetCPU{targetCPU}, targetFeatures{targetFeatures}, dataLayout{&dl},
+        tuneCPU{tuneCPU} {}
+
   CodeGenSpecifics() = delete;
   virtual ~CodeGenSpecifics() {}
 
@@ -165,6 +180,7 @@ class CodeGenSpecifics {
   virtual unsigned char getCIntTypeWidth() const = 0;
 
   llvm::StringRef getTargetCPU() const { return targetCPU; }
+  llvm::StringRef getTuneCPU() const { return tuneCPU; }
 
   mlir::LLVM::TargetFeaturesAttr getTargetFeatures() const {
     return targetFeatures;
@@ -182,6 +198,7 @@ class CodeGenSpecifics {
   llvm::StringRef targetCPU;
   mlir::LLVM::TargetFeaturesAttr targetFeatures;
   const mlir::DataLayout *dataLayout = nullptr;
+  llvm::StringRef tuneCPU;
 };
 
 } // namespace fir
diff --git a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
index 059a10ce2fe51..bd31aa0782493 100644
--- a/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
+++ b/flang/include/flang/Optimizer/Dialect/Support/FIRContext.h
@@ -58,6 +58,13 @@ void setTargetCPU(mlir::ModuleOp mod, llvm::StringRef cpu);
 /// Get the target CPU string from the Module or return a null reference.
 llvm::StringRef getTargetCPU(mlir::ModuleOp mod);
 
+/// Set the tune CPU for the module. `cpu` must not be deallocated while
+/// module `mod` is still live.
+void setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu);
+
+/// Get the tune CPU string from the Module or return a null reference.
+llvm::StringRef getTuneCPU(mlir::ModuleOp mod);
+
 /// Set the target features for the module.
 void setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features);
 
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 7a3baca4c19da..2b1752960f485 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -393,6 +393,9 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
     Option<"unsafeFPMath", "unsafe-fp-math",
            "bool", /*default=*/"false",
            "Set the unsafe-fp-math attribute on functions in the module.">,
+    Option<"tuneCPU", "tune-cpu",
+           "llvm::StringRef", /*default=*/"llvm::StringRef{}",
+           "Set the tune-cpu attribute on functions in the module.">,
   ];
 }
 
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index f64a939b785ef..13fda2ec6e035 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -402,6 +402,10 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
           args.getLastArg(clang::driver::options::OPT_target_cpu))
     opts.cpu = a->getValue();
 
+  if (const llvm::opt::Arg *a =
+          args.getLastArg(clang::driver::options::OPT_tune_cpu))
+    opts.tuneCPU = a->getValue();
+
   for (const llvm::opt::Arg *currentArg :
        args.filtered(clang::driver::options::OPT_target_feature))
     opts.featuresAsWritten.emplace_back(currentArg->getValue());
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index b1b6391f1439c..a01151dd6346b 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -292,7 +292,8 @@ bool CodeGenAction::beginSourceFileAction() {
       ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple,
       kindMap, ci.getInvocation().getLoweringOpts(),
       ci.getInvocation().getFrontendOpts().envDefaults,
-      ci.getInvocation().getFrontendOpts().features, targetMachine);
+      ci.getInvocation().getFrontendOpts().features, targetMachine,
+      ci.getInvocation().getTargetOpts().tuneCPU);
 
   // Fetch module from lb, so we can set
   mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 202efa57d4a36..7df49e3becf17 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -5832,7 +5832,7 @@ Fortran::lower::LoweringBridge::LoweringBridge(
     const Fortran::lower::LoweringOptions &loweringOptions,
     const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
     const Fortran::common::LanguageFeatureControl &languageFeatures,
-    const llvm::TargetMachine &targetMachine)
+    const llvm::TargetMachine &targetMachine, const llvm::StringRef tuneCPU)
     : semanticsContext{semanticsContext}, defaultKinds{defaultKinds},
       intrinsics{intrinsics}, targetCharacteristics{targetCharacteristics},
       cooked{&cooked}, context{context}, kindMap{kindMap},
@@ -5889,6 +5889,7 @@ Fortran::lower::LoweringBridge::LoweringBridge(
   fir::setTargetTriple(*module.get(), triple);
   fir::setKindMapping(*module.get(), kindMap);
   fir::setTargetCPU(*module.get(), targetMachine.getTargetCPU());
+  fir::setTuneCPU(*module.get(), tuneCPU);
   fir::setTargetFeatures(*module.get(), targetMachine.getTargetFeatureString());
   fir::support::setMLIRDataLayout(*module.get(),
                                   targetMachine.createDataLayout());
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 9f21c6b0cf097..6e25bcdb0a88e 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3597,6 +3597,9 @@ class FIRToLLVMLowering
     if (!forcedTargetCPU.empty())
       fir::setTargetCPU(mod, forcedTargetCPU);
 
+    if (!forcedTuneCPU.empty())
+      fir::setTuneCPU(mod, forcedTuneCPU);
+
     if (!forcedTargetFeatures.empty())
       fir::setTargetFeatures(mod, forcedTargetFeatures);
 
@@ -3693,7 +3696,8 @@ class FIRToLLVMLowering
       signalPassFailure();
     }
 
-    // Run pass to add comdats to functions that have weak linkage on relevant platforms
+    // Run pass to add comdats to functions that have weak linkage on relevant
+    // platforms
     if (fir::getTargetTriple(mod).supportsCOMDAT()) {
       mlir::OpPassManager comdatPM("builtin.module");
       comdatPM.addPass(mlir::LLVM::createLLVMAddComdats());
diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp
index 652e2bddc1b89..25141102a8c43 100644
--- a/flang/lib/Optimizer/CodeGen/Target.cpp
+++ b/flang/lib/Optimizer/CodeGen/Target.cpp
@@ -1113,3 +1113,14 @@ fir::CodeGenSpecifics::get(mlir::MLIRContext *ctx, llvm::Triple &&trp,
   }
   TODO(mlir::UnknownLoc::get(ctx), "target not implemented");
 }
+
+std::unique_ptr<fir::CodeGenSpecifics> fir::CodeGenSpecifics::get(
+    mlir::MLIRContext *ctx, llvm::Triple &&trp, KindMapping &&kindMap,
+    llvm::StringRef targetCPU, mlir::LLVM::TargetFeaturesAttr targetFeatures,
+    const mlir::DataLayout &dl, llvm::StringRef tuneCPU) {
+  std::unique_ptr<fir::CodeGenSpecifics> CGS = fir::CodeGenSpecifics::get(
+      ctx, std::move(trp), std::move(kindMap), targetCPU, targetFeatures, dl);
+
+  CGS->tuneCPU = tuneCPU;
+  return CGS;
+}
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 8199c5ef7fa26..a101295ba4c13 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -89,6 +89,9 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
     if (!forcedTargetCPU.empty())
       fir::setTargetCPU(mod, forcedTargetCPU);
 
+    if (!forcedTuneCPU.empty())
+      fir::setTuneCPU(mod, forcedTuneCPU);
+
     if (!forcedTargetFeatures.empty())
       fir::setTargetFeatures(mod, forcedTargetFeatures);
 
@@ -106,7 +109,8 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
 
     auto specifics = fir::CodeGenSpecifics::get(
         mod.getContext(), fir::getTargetTriple(mod), fir::getKindMapping(mod),
-        fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl);
+        fir::getTargetCPU(mod), fir::getTargetFeatures(mod), *dl,
+        fir::getTuneCPU(mod));
 
     setMembers(specifics.get(), &rewriter, &*dl);
 
@@ -672,12 +676,18 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
     auto targetCPU = specifics->getTargetCPU();
     mlir::StringAttr targetCPUAttr =
         targetCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, targetCPU);
+    auto tuneCPU = specifics->getTuneCPU();
+    mlir::StringAttr tuneCPUAttr =
+        tuneCPU.empty() ? nullptr : mlir::StringAttr::get(ctx, tuneCPU);
     auto targetFeaturesAttr = specifics->getTargetFeatures();
 
     for (auto fn : mod.getOps<mlir::func::FuncOp>()) {
       if (targetCPUAttr)
         fn->setAttr("target_cpu", targetCPUAttr);
 
+      if (tuneCPUAttr)
+        fn->setAttr("tune_cpu", tuneCPUAttr);
+
       if (targetFeaturesAttr)
         fn->setAttr("target_features", targetFeaturesAttr);
 
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 07d3bd713ce45..2b8f8299cb9e5 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -35,7 +35,8 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
       kindMapping(getKindMapping(module)),
       specifics(CodeGenSpecifics::get(
           module.getContext(), getTargetTriple(module), getKindMapping(module),
-          getTargetCPU(module), getTargetFeatures(module), dl)),
+          getTargetCPU(module), getTargetFeatures(module), dl,
+          getTuneCPU(module))),
       tbaaBuilder(std::make_unique<TBAABuilder>(module->getContext(), applyTBAA,
                                                 forceUnifiedTBAATree)),
       dataLayout{&dl} {
diff --git a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
index c4d00875c45e4..1aa631cb39126 100644
--- a/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
+++ b/flang/lib/Optimizer/Dialect/Support/FIRContext.cpp
@@ -77,6 +77,24 @@ llvm::StringRef fir::getTargetCPU(mlir::ModuleOp mod) {
   return {};
 }
 
+static constexpr const char *tuneCpuName = "fir.tune_cpu";
+
+void fir::setTuneCPU(mlir::ModuleOp mod, llvm::StringRef cpu) {
+  if (cpu.empty())
+    return;
+
+  auto *ctx = mod.getContext();
+
+  mod->setAttr(tuneCpuName, mlir::StringAttr::get(ctx, cpu));
+}
+
+llvm::StringRef fir::getTuneCPU(mlir::ModuleOp mod) {
+  if (auto attr = mod->getAttrOfType<mlir::StringAttr>(tuneCpuName))
+    return attr.getValue();
+
+  return {};
+}
+
 static constexpr const char *targetFeaturesName = "fir.target_features";
 
 void fir::setTargetFeatures(mlir::ModuleOp mod, llvm::StringRef features) {
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 3485c1499d3b6..44bddde35c103 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -371,7 +371,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
       semanticsContext.targetCharacteristics(), parsing.allCooked(),
       targetTriple, kindMap, loweringOptions, envDefaults,
-      semanticsContext.languageFeatures(), targetMachine);
+      semanticsContext.languageFeatures(), targetMachine, ""); // FIXME
   mlir::ModuleOp mlirModule = burnside.getModule();
   if (enableOpenMP) {
     if (enableOpenMPGPU && !enableOpenMPDevice) {
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index 399ea1362fda4..c8964d46b9cea 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -58,6 +58,9 @@ static cl::opt<std::string> targetTriple("target",
 static cl::opt<std::string>
     targetCPU("target-cpu", cl::desc("specify a target CPU"), cl::init(""));
 
+static cl::opt<std::string> tuneCPU("tune-cpu", cl::desc("specify a tune CPU"),
+                                    cl::init(""));
+
 static cl::opt<std::string>
     targetFeatures("target-features", cl::desc("specify the target features"),
                    cl::init(""));
@@ -113,6 +116,7 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   fir::setTargetTriple(*owningRef, targetTriple);
   fir::setKindMapping(*owningRef, kindMap);
   fir::setTargetCPU(*owningRef, targetCPU);
+  fir::setTuneCPU(*owningRef, tuneCPU);
   fir::setTargetFeatures(*owningRef, targetFeatures);
   // tco is a testing tool, so it will happily use the target independent
   // data layout if none is on the module.
diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp
index 49e1ebf23d8aa..3f8b59ac94a95 100644
--- a/flang/unittests/Optimizer/FIRContextTest.cpp
+++ b/flang/unittests/Optimizer/FIRContextTest.cpp
@@ -34,6 +34,7 @@ struct StringAttributesTests : public testing::Test {
       "i10:80,l3:24,a1:8,r54:Double,r62:X86_FP80,r11:PPC_FP128";
   std::string target = "powerpc64le-unknown-linux-gnu";
   std::string targetCPU = "gfx90a";
+  std::string tuneCPU = "generic";
   std::string targetFeatures = "+gfx9-insts,+wavefrontsize64";
   mlir::ModuleOp mod;
 };
@@ -42,6 +43,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) {
   setTargetTriple(mod, target);
   setKindMapping(mod, *kindMap);
   setTargetCPU(mod, targetCPU);
+  setTuneCPU(mod, tuneCPU);
   setTargetFeatures(mod, targetFeatures);
 
   auto triple = getTargetTriple(mod);
@@ -61,6 +63,7 @@ TEST_F(StringAttributesTests, moduleStringAttrTest) {
   EXPECT_TRUE(mapStr.find("r62:X86_FP80") != std::string::npos);
 
   EXPECT_EQ(getTargetCPU(mod), targetCPU);
+  EXPECT_EQ(getTuneCPU(mod), tuneCPU);
 
   auto features = getTargetFeatures(mod);
   auto featuresList = features.getFeatures();
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index b05366d2a635d..2140da3518aed 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -58,6 +58,15 @@ def FramePointerKindAttr : LLVM_Attr<"FramePointerKind", "framePointerKind"> {
   let assemblyFormat = "`<` $framePointerKind `>`";
 }
 
+//===----------------------------------------------------------------------===//
+// TuneCPUAttr
+//===----------------------------------------------------------------------===//
+
+//def TuneCPUAttr : LLVM_Attr<"TuneCPU", "tuneCPU"> {
+  //let parameters = (ins "tuneCPU::tuneCPU":$tuneCPU);
+  //let assemblyFormat = "`<` $tuneCPU `>`";
+//}
+
 //===----------------------------------------------------------------------===//
 // Loop Attributes
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index f6f907f39a4b4..d7eed6d4ab7be 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1447,6 +1447,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
     OptionalAttr<LLVM_VScaleRangeAttr>:$vscale_range,
     OptionalAttr<FramePointerKindAttr>:$frame_pointer,
     OptionalAttr<StrAttr>:$target_cpu,
+    OptionalAttr<StrAttr>:$tune_cpu,
     OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features,
     OptionalAttr<BoolAttr>:$unsafe_fp_math,
     OptionalAttr<BoolAttr>:$no_infs_fp_math,
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 191b84acd56fa..68cddad20a6f1 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -1676,6 +1676,7 @@ static constexpr std::array ExplicitAttributes{
     StringLiteral("vscale_range"),
     StringLiteral("frame-pointer"),
     StringLiteral("target-features"),
+    StringLiteral("tune-cpu"),
     StringLiteral("unsafe-fp-math"),
     StringLiteral("no-infs-fp-math"),
     StringLiteral("no-nans-fp-math"),
@@ -1786,6 +1787,10 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
       attr.isStringAttribute())
     funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString()));
 
+  if (llvm::Attribute attr = func->getFnAttribute("tune-cpu");
+      attr.isStringAttribute())
+    funcOp.setTuneCpuAttr(StringAttr::get(context, attr.getValueAsString()));
+
   if (llvm::Attribute attr = func->getFnAttribute("target-features");
       attr.isStringAttribute())
     funcOp.setTargetFeaturesAttr(
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 7b86b250c294b..6c22018ad40ee 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1325,6 +1325,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
   if (auto targetCpu = func.getTargetCpu())
     llvmFunc->addFnAttr("target-cpu", *targetCpu);
 
+  if (auto tuneCpu = func.getTuneCpu())
+    llvmFunc->addFnAttr("tune-cpu", *tuneCpu);
+
   if (auto targetFeatures = func.getTargetFeatures())
     llvmFunc->addFnAttr("target-features", targetFeatures->getFeaturesString());
 

>From 5955c3e9ddadb8c7b312794a7dd589adfeb7216c Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <AlexisPerry at users.noreply.github.com>
Date: Mon, 10 Jun 2024 15:19:41 -0600
Subject: [PATCH 2/8] clang-format edit

---
 clang/lib/Driver/ToolChains/Flang.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 3dc7ee0ea2bff..fa5ad1e50bc66 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -15,9 +15,9 @@
 #include "llvm/Frontend/Debug/Options.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/RISCVISAInfo.h"
 #include "llvm/TargetParser/RISCVTargetParser.h"
-#include "llvm/TargetParser/Host.h"
 
 #include <cassert>
 

>From 2130e8ba712d3b252275c59ddf18ca37d873b86a Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Tue, 11 Jun 2024 09:20:56 -0600
Subject: [PATCH 3/8] Address review comments - code cleanup

---
 flang/tools/bbc/bbc.cpp                          | 2 +-
 mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 44bddde35c103..c01dbd200af49 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -371,7 +371,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
       semanticsContext.targetCharacteristics(), parsing.allCooked(),
       targetTriple, kindMap, loweringOptions, envDefaults,
-      semanticsContext.languageFeatures(), targetMachine, ""); // FIXME
+      semanticsContext.languageFeatures(), targetMachine, "");
   mlir::ModuleOp mlirModule = burnside.getModule();
   if (enableOpenMP) {
     if (enableOpenMPGPU && !enableOpenMPDevice) {
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index 2140da3518aed..b05366d2a635d 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -58,15 +58,6 @@ def FramePointerKindAttr : LLVM_Attr<"FramePointerKind", "framePointerKind"> {
   let assemblyFormat = "`<` $framePointerKind `>`";
 }
 
-//===----------------------------------------------------------------------===//
-// TuneCPUAttr
-//===----------------------------------------------------------------------===//
-
-//def TuneCPUAttr : LLVM_Attr<"TuneCPU", "tuneCPU"> {
-  //let parameters = (ins "tuneCPU::tuneCPU":$tuneCPU);
-  //let assemblyFormat = "`<` $tuneCPU `>`";
-//}
-
 //===----------------------------------------------------------------------===//
 // Loop Attributes
 //===----------------------------------------------------------------------===//

>From d5d0981e65465910acd4435d33c34e357f816420 Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Wed, 12 Jun 2024 09:21:31 -0600
Subject: [PATCH 4/8] added mtune tests

---
 flang/test/Lower/tune-cpu-fir.f90  | 13 +++++++++++++
 flang/test/Lower/tune-cpu-llvm.f90 |  6 ++++++
 2 files changed, 19 insertions(+)
 create mode 100644 flang/test/Lower/tune-cpu-fir.f90
 create mode 100644 flang/test/Lower/tune-cpu-llvm.f90

diff --git a/flang/test/Lower/tune-cpu-fir.f90 b/flang/test/Lower/tune-cpu-fir.f90
new file mode 100644
index 0000000000000..c9a34b358e904
--- /dev/null
+++ b/flang/test/Lower/tune-cpu-fir.f90
@@ -0,0 +1,13 @@
+! REQUIRES: x86-registered-target
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o - | FileCheck %s --check-prefixes=ALL,CPU
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,TUNE
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -tune-cpu pentium4 %s -o - | FileCheck %s --check-prefixes=ALL,BOTH
+
+! ALL: module attributes {
+
+! CPU-SAME:      fir.target_cpu = "x86-64"
+
+! TUNE-SAME:     fir.tune_cpu = "pentium4"
+
+! BOTH-SAME: fir.target_cpu = "x86-64"
+! BOTH-SAME: fir.tune_cpu = "pentium4"  
diff --git a/flang/test/Lower/tune-cpu-llvm.f90 b/flang/test/Lower/tune-cpu-llvm.f90
new file mode 100644
index 0000000000000..5349c4d9d8d4b
--- /dev/null
+++ b/flang/test/Lower/tune-cpu-llvm.f90
@@ -0,0 +1,6 @@
+! RUN: %flang -mtune=pentium4 -S -emit-llvm %s -o - | FileCheck %s
+
+!CHECK: attributes #0 = {
+!CHECK: "tune-cpu"="pentium4"
+subroutine a
+end subroutine a

>From a4306731f2d75116843d47d630aa9c5e0113deaf Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Wed, 12 Jun 2024 10:52:45 -0600
Subject: [PATCH 5/8] updates to mtune tests following reviewer feedback

---
 flang/test/Lower/tune-cpu-fir.f90  | 3 ++-
 flang/test/Lower/tune-cpu-llvm.f90 | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/tune-cpu-fir.f90 b/flang/test/Lower/tune-cpu-fir.f90
index c9a34b358e904..2db1935e42a21 100644
--- a/flang/test/Lower/tune-cpu-fir.f90
+++ b/flang/test/Lower/tune-cpu-fir.f90
@@ -6,7 +6,8 @@
 ! ALL: module attributes {
 
 ! CPU-SAME:      fir.target_cpu = "x86-64"
-
+! CPU-NOT:       fir.tune_cpu = "pentium4"
+  
 ! TUNE-SAME:     fir.tune_cpu = "pentium4"
 
 ! BOTH-SAME: fir.target_cpu = "x86-64"
diff --git a/flang/test/Lower/tune-cpu-llvm.f90 b/flang/test/Lower/tune-cpu-llvm.f90
index 5349c4d9d8d4b..aa6594bc76312 100644
--- a/flang/test/Lower/tune-cpu-llvm.f90
+++ b/flang/test/Lower/tune-cpu-llvm.f90
@@ -1,6 +1,6 @@
 ! RUN: %flang -mtune=pentium4 -S -emit-llvm %s -o - | FileCheck %s
 
-!CHECK: attributes #0 = {
-!CHECK: "tune-cpu"="pentium4"
+!CHECK: attributes #{{[0-9]+}} = {
+!CHECK-SAME: "tune-cpu"="pentium4"
 subroutine a
 end subroutine a

>From 489f4eb840ef79fbbd25753ca5b7fef855d127e8 Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Wed, 12 Jun 2024 12:43:05 -0600
Subject: [PATCH 6/8] Address review comments - variable name change

---
 flang/include/flang/Frontend/TargetOptions.h | 2 +-
 flang/lib/Frontend/CompilerInvocation.cpp    | 2 +-
 flang/lib/Frontend/FrontendActions.cpp       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h
index a7a7192c55cb1..fa72c77a028a1 100644
--- a/flang/include/flang/Frontend/TargetOptions.h
+++ b/flang/include/flang/Frontend/TargetOptions.h
@@ -33,7 +33,7 @@ class TargetOptions {
   std::string cpu;
 
   /// If given, the name of the target CPU to tune code for.
-  std::string tuneCPU;
+  std::string cpuToTuneFor;
 
   /// The list of target specific features to enable or disable, as written on
   /// the command line.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 13fda2ec6e035..e0f4075134c71 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -404,7 +404,7 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
 
   if (const llvm::opt::Arg *a =
           args.getLastArg(clang::driver::options::OPT_tune_cpu))
-    opts.tuneCPU = a->getValue();
+    opts.cpuToTuneFor = a->getValue();
 
   for (const llvm::opt::Arg *currentArg :
        args.filtered(clang::driver::options::OPT_target_feature))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index a01151dd6346b..4ccde19d1f948 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -293,7 +293,7 @@ bool CodeGenAction::beginSourceFileAction() {
       kindMap, ci.getInvocation().getLoweringOpts(),
       ci.getInvocation().getFrontendOpts().envDefaults,
       ci.getInvocation().getFrontendOpts().features, targetMachine,
-      ci.getInvocation().getTargetOpts().tuneCPU);
+      ci.getInvocation().getTargetOpts().cpuToTuneFor);
 
   // Fetch module from lb, so we can set
   mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());

>From 0fe4f9237bffca18f7b5499a1897d3ecc3054ba3 Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Wed, 12 Jun 2024 13:02:28 -0600
Subject: [PATCH 7/8] added mlir import/export tests for tune-cpu

---
 mlir/test/Target/LLVMIR/Import/tune-cpu.ll | 9 +++++++++
 mlir/test/Target/LLVMIR/tune-cpu.mlir      | 7 +++++++
 2 files changed, 16 insertions(+)
 create mode 100644 mlir/test/Target/LLVMIR/Import/tune-cpu.ll
 create mode 100644 mlir/test/Target/LLVMIR/tune-cpu.mlir

diff --git a/mlir/test/Target/LLVMIR/Import/tune-cpu.ll b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll
new file mode 100644
index 0000000000000..9607da82c298a
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/tune-cpu.ll
@@ -0,0 +1,9 @@
+; RUN: mlir-translate -import-llvm -split-input-file %s | FileCheck %s
+
+; CHECK-LABEL: llvm.func @tune_cpu()
+; CHECK-SAME: tune_cpu = "pentium4"
+define void @tune_cpu() #0 {
+  ret void
+}
+
+attributes #0 = { "tune-cpu"="pentium4" }
diff --git a/mlir/test/Target/LLVMIR/tune-cpu.mlir b/mlir/test/Target/LLVMIR/tune-cpu.mlir
new file mode 100644
index 0000000000000..06268022fb235
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/tune-cpu.mlir
@@ -0,0 +1,7 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK: define void @tune_cpu() #[[ATTRS:.*]] {
+// CHECK: attributes #[[ATTRS]] = { "tune-cpu"="pentium4" }
+llvm.func @tune_cpu() attributes {tune_cpu = "pentium4"} {
+  llvm.return
+}

>From bc07fbc575489db8aee856169e5fd6032803e654 Mon Sep 17 00:00:00 2001
From: Alexis Perry-Holby <aperry at lanl.gov>
Date: Wed, 12 Jun 2024 13:09:49 -0600
Subject: [PATCH 8/8] Address review comments - increase clarity in bbc default
 lowering

---
 flang/tools/bbc/bbc.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index c01dbd200af49..c027627975776 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -367,11 +367,12 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
   loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
   loweringOptions.setNSWOnLoopVarInc(setNSW);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
+  constexpr const char* tuneCPU = "";
   auto burnside = Fortran::lower::LoweringBridge::create(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
       semanticsContext.targetCharacteristics(), parsing.allCooked(),
       targetTriple, kindMap, loweringOptions, envDefaults,
-      semanticsContext.languageFeatures(), targetMachine, "");
+      semanticsContext.languageFeatures(), targetMachine, tuneCPU);
   mlir::ModuleOp mlirModule = burnside.getModule();
   if (enableOpenMP) {
     if (enableOpenMPGPU && !enableOpenMPDevice) {



More information about the cfe-commits mailing list