[Mlir-commits] [flang] [mlir] [Flang][MLIR][OpenMP] Add support for target-cpu and target-features (PR #75344)

Mon Jan 15 05:57:57 PST 2024

https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/75344

>From 45df08a6e1928d649fda0ae5d8e7280a7406c530 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Wed, 13 Dec 2023 12:43:50 +0000
Subject: [PATCH 1/3] [Flang][MLIR][OpenMP] Add support for target-cpu and
 target-features

This patch implements a complete flow to propagate information on the target
CPU and target features, as obtained from Flang's command line arguments, to
LLVM IR. The proposed approach can be summarized as follows:
  - A reference to the `TargetMachine` object is stored in the
  `LoweringBridge`. This enables the addition of the "target_cpu" and
  "llvm.target_features" MLIR discardable attributes to all generated
  `func.func` operations.
  - The Func to LLVM dialect conversion pass is updated to translate instances
  of "llvm.target_features" string attributes into the expected structured
  attribute representation for the corresponding "target_features" argument of
  `llvm.func` operations. The "target_cpu" is passed through by default to a
  new string attribute of that same name added to the `llvm.func` operation.
  - The new "target_cpu" attribute is translated to a "target-cpu" LLVM IR
  function attribute.
  - The existing `omp.target` OpenMP attribute class, which is used to
  represent these two attributes, is removed, together with all uses.
  - The OpenMP to LLVMIR translation is updated to propagate these attributes
  to outlined target regions, so they match the attributes from the function
  they are originally extracted.

This patch is probably best to be split up into different parts to simplify
reviews, but I think it's good to present the whole approach early to see if
there are any conceptual problems with it, as I will only have bandwidth to
split this patch in January.
---
 flang/include/flang/Lower/Bridge.h            | 14 +++++-----
 flang/include/flang/Tools/CrossToolHelpers.h  | 11 --------
 flang/lib/Frontend/FrontendActions.cpp        |  7 +----
 flang/lib/Lower/Bridge.cpp                    | 19 ++++++++++---
 flang/test/Driver/save-mlir-temps.f90         |  6 ++--
 flang/test/Lower/target-features.f90          | 20 +++++++++++++
 flang/tools/bbc/bbc.cpp                       |  3 +-
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  1 +
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 10 -------
 .../Dialect/OpenMP/OpenMPOpsInterfaces.td     | 28 -------------------
 mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 15 ++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 24 ++++++++++++++--
 mlir/lib/Target/LLVMIR/ModuleImport.cpp       |  5 ++++
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  3 ++
 .../Conversion/FuncToLLVM/convert-funcs.mlir  | 13 +++++++++
 .../one-shot-module-bufferize-analysis.mlir   |  2 +-
 mlir/test/Target/LLVMIR/Import/target-cpu.ll  |  9 ++++++
 .../LLVMIR/omptarget-parallel-llvm.mlir       |  2 +-
 .../LLVMIR/omptarget-target-features.mlir     | 23 +++++++++++++++
 mlir/test/Target/LLVMIR/target-cpu.mlir       |  7 +++++
 20 files changed, 146 insertions(+), 76 deletions(-)
 create mode 100644 flang/test/Lower/target-features.f90
 create mode 100644 mlir/test/Target/LLVMIR/Import/target-cpu.ll
 create mode 100644 mlir/test/Target/LLVMIR/omptarget-target-features.mlir
 create mode 100644 mlir/test/Target/LLVMIR/target-cpu.mlir

diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h
index 6c0d14d65edae1..4864a08d9977b8 100644
--- a/flang/include/flang/Lower/Bridge.h
+++ b/flang/include/flang/Lower/Bridge.h
@@ -21,10 +21,7 @@
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
 #include "mlir/IR/BuiltinOps.h"
-
-namespace llvm {
-class DataLayout;
-} // namespace llvm
+#include "llvm/Target/TargetMachine.h"
 
 namespace Fortran {
 namespace common {
@@ -64,11 +61,11 @@ class LoweringBridge {
          const Fortran::lower::LoweringOptions &loweringOptions,
          const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
          const Fortran::common::LanguageFeatureControl &languageFeatures,
-         const llvm::DataLayout *dataLayout = nullptr) {
+         const llvm::TargetMachine &targetMachine) {
     return LoweringBridge(ctx, semanticsContext, defaultKinds, intrinsics,
                           targetCharacteristics, allCooked, triple, kindMap,
                           loweringOptions, envDefaults, languageFeatures,
-                          dataLayout);
+                          targetMachine);
   }
 
   //===--------------------------------------------------------------------===//
@@ -110,6 +107,8 @@ class LoweringBridge {
     return languageFeatures;
   }
 
+  const llvm::TargetMachine &getTargetMachine() const { return targetMachine; }
+
   /// Create a folding context. Careful: this is very expensive.
   Fortran::evaluate::FoldingContext createFoldingContext() const;
 
@@ -147,7 +146,7 @@ class LoweringBridge {
       const Fortran::lower::LoweringOptions &loweringOptions,
       const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
       const Fortran::common::LanguageFeatureControl &languageFeatures,
-      const llvm::DataLayout *dataLayout);
+      const llvm::TargetMachine &targetMachine);
   LoweringBridge() = delete;
   LoweringBridge(const LoweringBridge &) = delete;
 
@@ -164,6 +163,7 @@ class LoweringBridge {
   const Fortran::lower::LoweringOptions &loweringOptions;
   const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults;
   const Fortran::common::LanguageFeatureControl &languageFeatures;
+  const llvm::TargetMachine &targetMachine;
 };
 
 } // namespace lower
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index ddec70fa9824c5..9728e09cbc8c13 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -103,17 +103,6 @@ void setOffloadModuleInterfaceAttributes(
   }
 }
 
-//  Shares assinging of the OpenMP OffloadModuleInterface and its TargetCPU
-//  attribute accross Flang tools (bbc/flang)
-void setOffloadModuleInterfaceTargetAttribute(mlir::ModuleOp &module,
-    llvm::StringRef targetCPU, llvm::StringRef targetFeatures) {
-  // Should be registered by the OpenMPDialect
-  if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-          module.getOperation())) {
-    offloadMod.setTarget(targetCPU, targetFeatures);
-  }
-}
-
 void setOpenMPVersionAttribute(mlir::ModuleOp &module, int64_t version) {
   module.getOperation()->setAttr(
       mlir::StringAttr::get(module.getContext(), llvm::Twine{"omp.version"}),
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index d4a3e164d20739..ede5348067df12 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -283,8 +283,6 @@ bool CodeGenAction::beginSourceFileAction() {
       ci.getSemanticsContext().defaultKinds();
   fir::KindMapping kindMap(mlirCtx.get(), llvm::ArrayRef<fir::KindTy>{
                                               fir::fromDefaultKinds(defKinds)});
-  const llvm::DataLayout &dl = targetMachine.createDataLayout();
-
   lower::LoweringBridge lb = Fortran::lower::LoweringBridge::create(
       *mlirCtx, ci.getSemanticsContext(), defKinds,
       ci.getSemanticsContext().intrinsics(),
@@ -292,7 +290,7 @@ bool CodeGenAction::beginSourceFileAction() {
       ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple,
       kindMap, ci.getInvocation().getLoweringOpts(),
       ci.getInvocation().getFrontendOpts().envDefaults,
-      ci.getInvocation().getFrontendOpts().features, &dl);
+      ci.getInvocation().getFrontendOpts().features, targetMachine);
 
   // Fetch module from lb, so we can set
   mlirModule = std::make_unique<mlir::ModuleOp>(lb.getModule());
@@ -301,9 +299,6 @@ bool CodeGenAction::beginSourceFileAction() {
           Fortran::common::LanguageFeature::OpenMP)) {
     setOffloadModuleInterfaceAttributes(*mlirModule,
                                         ci.getInvocation().getLangOpts());
-    setOffloadModuleInterfaceTargetAttribute(
-        *mlirModule, targetMachine.getTargetCPU(),
-        targetMachine.getTargetFeatureString());
     setOpenMPVersionAttribute(*mlirModule,
                               ci.getInvocation().getLangOpts().OpenMPVersion);
   }
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 7e64adc3c144c9..2c2424a47d28d5 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4326,6 +4326,17 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     assert(blockId == 0 && "invalid blockId");
     assert(activeConstructStack.empty() && "invalid construct stack state");
 
+    // Set target_cpu and target_features attributes to be passed through to the
+    // llvm.func operation during lowering.
+    const llvm::TargetMachine &targetMachine = bridge.getTargetMachine();
+    if (auto targetCPU = targetMachine.getTargetCPU(); !targetCPU.empty())
+      func->setAttr("target_cpu",
+                    mlir::StringAttr::get(func.getContext(), targetCPU));
+    if (auto targetFeatures = targetMachine.getTargetFeatureString();
+        !targetFeatures.empty())
+      func->setAttr("llvm.target_features",
+                    mlir::StringAttr::get(func.getContext(), targetFeatures));
+
     // Manage floating point exception, halting mode, and rounding mode
     // settings at function entry and exit.
     if (!funit.isMainProgram())
@@ -5091,12 +5102,12 @@ Fortran::lower::LoweringBridge::LoweringBridge(
     const Fortran::lower::LoweringOptions &loweringOptions,
     const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults,
     const Fortran::common::LanguageFeatureControl &languageFeatures,
-    const llvm::DataLayout *dataLayout)
+    const llvm::TargetMachine &targetMachine)
     : semanticsContext{semanticsContext}, defaultKinds{defaultKinds},
       intrinsics{intrinsics}, targetCharacteristics{targetCharacteristics},
       cooked{&cooked}, context{context}, kindMap{kindMap},
       loweringOptions{loweringOptions}, envDefaults{envDefaults},
-      languageFeatures{languageFeatures} {
+      languageFeatures{languageFeatures}, targetMachine{targetMachine} {
   // Register the diagnostic handler.
   context.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) {
     llvm::raw_ostream &os = llvm::errs();
@@ -5147,6 +5158,6 @@ Fortran::lower::LoweringBridge::LoweringBridge(
   assert(module.get() && "module was not created");
   fir::setTargetTriple(*module.get(), triple);
   fir::setKindMapping(*module.get(), kindMap);
-  if (dataLayout)
-    fir::support::setMLIRDataLayout(*module.get(), *dataLayout);
+  fir::support::setMLIRDataLayout(*module.get(),
+                                  targetMachine.createDataLayout());
 }
diff --git a/flang/test/Driver/save-mlir-temps.f90 b/flang/test/Driver/save-mlir-temps.f90
index 50bc83030caa91..4fad6c9d9cf1dc 100644
--- a/flang/test/Driver/save-mlir-temps.f90
+++ b/flang/test/Driver/save-mlir-temps.f90
@@ -51,9 +51,9 @@
 ! Content to check from the MLIR outputs
 !--------------------------
 ! MLIR-FIR-NOT: llvm.func
-! MLIR-FIR: func.func @{{.*}}main() {
+! MLIR-FIR: func.func @{{.*}}main()
 
-! MLIR-FIR-NOT: func.func
-! MLIR-LLVMIR: llvm.func @{{.*}}main() {
+! MLIR-LLVMIR-NOT: func.func
+! MLIR-LLVMIR: llvm.func @{{.*}}main()
 
 end program
diff --git a/flang/test/Lower/target-features.f90 b/flang/test/Lower/target-features.f90
new file mode 100644
index 00000000000000..102f89c6de63ab
--- /dev/null
+++ b/flang/test/Lower/target-features.f90
@@ -0,0 +1,20 @@
+! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes=ALL,NONE
+! RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefixes=ALL,TRIPLE
+! RUN: %flang_fc1 -emit-fir -target-cpu gfx90a %s -o - | FileCheck %s --check-prefixes=ALL,CPU
+! RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx90a %s -o - | FileCheck %s --check-prefixes=ALL,BOTH
+
+! ALL-LABEL: func.func @_QPfoo()
+
+! NONE-NOT: llvm.target_features
+! NONE-NOT: target_cpu
+
+! TRIPLE-NOT: llvm.target_features
+! TRIPLE-SAME: target_cpu = "generic-hsa"
+
+! CPU-NOT: llvm.target_features
+! CPU-SAME: target_cpu = "gfx90a"
+
+! BOTH-SAME: llvm.target_features = "{{[^"]*}}+gfx90a-insts{{[^"]*}}"
+! BOTH-SAME: target_cpu = "gfx90a"
+subroutine foo
+end subroutine
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 0122cf33b0b677..f6defdb745fd8e 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -325,7 +325,6 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
   auto &defKinds = semanticsContext.defaultKinds();
   fir::KindMapping kindMap(
       &ctx, llvm::ArrayRef<fir::KindTy>{fir::fromDefaultKinds(defKinds)});
-  const llvm::DataLayout &dataLayout = targetMachine.createDataLayout();
   std::string targetTriple = targetMachine.getTargetTriple().normalize();
   // Use default lowering options for bbc.
   Fortran::lower::LoweringOptions loweringOptions{};
@@ -336,7 +335,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
       semanticsContext.targetCharacteristics(), parsing.allCooked(),
       targetTriple, kindMap, loweringOptions, {},
-      semanticsContext.languageFeatures(), &dataLayout);
+      semanticsContext.languageFeatures(), targetMachine);
   burnside.lower(parseTree, semanticsContext);
   mlir::ModuleOp mlirModule = burnside.getModule();
   if (enableOpenMP) {
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 9e65898154bd65..19366eaea1ee27 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1418,6 +1418,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
     OptionalAttr<I64Attr>:$alignment,
     OptionalAttr<LLVM_VScaleRangeAttr>:$vscale_range,
     OptionalAttr<FramePointerKindAttr>:$frame_pointer,
+    OptionalAttr<StrAttr>:$target_cpu,
     OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features
   );
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 8ff5380f71ad45..7ec77e96ed26a9 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -71,16 +71,6 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
   let assemblyFormat = "`<` struct(params) `>`";
 }
 
-def TargetAttr : OpenMP_Attr<"Target", "target"> {
-  let parameters = (ins
-    StringRefParameter<>:$target_cpu,
-    StringRefParameter<>:$target_features
-  );
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-
 class OpenMP_Op<string mnemonic, list<Trait> traits = []> :
       Op<OpenMP_Dialect, mnemonic, traits>;
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 77001fc816cf91..e9ab4c70d30cd9 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -204,34 +204,6 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
                       assumeTeamsOversubscription, assumeThreadsOversubscription,
                       assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion));
       }]>,
-    InterfaceMethod<
-      /*description=*/[{
-        Get the Target attribute on the current module if it exists
-        and return the attribute, if it doesn't exist it returns a nullptr.
-      }],
-      /*retTy=*/"mlir::omp::TargetAttr",
-      /*methodName=*/"getTarget",
-      (ins), [{}], [{
-        if (Attribute flags = $_op->getAttr("omp.target"))
-          return ::llvm::dyn_cast_or_null<mlir::omp::TargetAttr>(flags);
-        return nullptr;
-      }]>,
-    InterfaceMethod<
-      /*description=*/[{
-        Set the attribute target on the current module with the
-        specified string arguments - name of cpu and corresponding features.
-      }],
-      /*retTy=*/"void",
-      /*methodName=*/"setTarget",
-      (ins "llvm::StringRef":$targetCPU,
-           "llvm::StringRef":$targetFeatures), [{}], [{
-        if (targetCPU.empty())
-          return;
-        $_op->setAttr(("omp." + mlir::omp::TargetAttr::getMnemonic()).str(),
-                  mlir::omp::TargetAttr::get($_op->getContext(),
-                                             targetCPU.str(),
-                                             targetFeatures.str()));
-      }]>,
     InterfaceMethod<
       /*description=*/[{
         Set a StringAttr on the current module containing the host IR file path. This
diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
index bd50c67fb87958..552175804ec2c4 100644
--- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
+++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
@@ -64,6 +64,7 @@ using namespace mlir;
 static constexpr StringRef varargsAttrName = "func.varargs";
 static constexpr StringRef linkageAttrName = "llvm.linkage";
 static constexpr StringRef barePtrAttrName = "llvm.bareptr";
+static constexpr StringRef targetFeaturesAttrName = "llvm.target_features";
 
 /// Return `true` if the `op` should use bare pointer calling convention.
 static bool shouldUseBarePtrCallConv(Operation *op,
@@ -79,6 +80,7 @@ static void filterFuncAttributes(FunctionOpInterface func,
   for (const NamedAttribute &attr : func->getDiscardableAttrs()) {
     if (attr.getName() == linkageAttrName ||
         attr.getName() == varargsAttrName ||
+        attr.getName() == targetFeaturesAttrName ||
         attr.getName() == LLVM::LLVMDialect::getReadnoneAttrName())
       continue;
     result.push_back(attr);
@@ -379,6 +381,19 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp,
     newFuncOp.setMemoryAttr(memoryAttr);
   }
 
+  // Create target_features attribute.
+  if (funcOp->hasAttr(targetFeaturesAttrName)) {
+    auto attr = funcOp->getAttrOfType<StringAttr>(targetFeaturesAttrName);
+    if (!attr) {
+      funcOp->emitError() << "Contains " << targetFeaturesAttrName
+                          << " attribute not of type StringAttr";
+      return rewriter.notifyMatchFailure(
+          funcOp, "Contains target features attribute not of type StringAttr");
+    }
+    newFuncOp.setTargetFeaturesAttr(
+        LLVM::TargetFeaturesAttr::get(rewriter.getContext(), attr.strref()));
+  }
+
   // Propagate argument/result attributes to all converted arguments/result
   // obtained after converting a given original argument/result.
   if (ArrayAttr resAttrDicts = funcOp.getAllResultAttrs()) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4f6200d29a70a6..e453351188865b 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2312,6 +2312,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   if (!targetOpSupported(opInst))
     return failure();
 
+  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
   auto targetOp = cast<omp::TargetOp>(opInst);
   auto &targetRegion = targetOp.getRegion();
   DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
@@ -2322,6 +2323,23 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codeGenIP) -> InsertPointTy {
     builder.restoreIP(codeGenIP);
+
+    // Forward target-cpu and target-features function attributes from the
+    // original function to the new outlined function.
+    llvm::Function *llvmParentFn =
+        moduleTranslation.lookupFunction(parentFn.getName());
+    llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
+    assert(llvmParentFn && llvmOutlinedFn &&
+           "Both parent and outlined functions must exist at this point");
+
+    if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
+        attr.isStringAttribute())
+      llvmOutlinedFn->addFnAttr(attr);
+
+    if (auto attr = llvmParentFn->getFnAttribute("target-features");
+        attr.isStringAttribute())
+      llvmOutlinedFn->addFnAttr(attr);
+
     unsigned argIndex = 0;
     for (auto &mapOp : mapOperands) {
       auto mapInfoOp =
@@ -2339,11 +2357,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   };
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  StringRef parentName = opInst.getParentOfType<LLVM::LLVMFuncOp>().getName();
+  StringRef parentName = parentFn.getName();
 
   // Override parent name if early outlining function
-  if (auto earlyOutlineOp = llvm::dyn_cast<mlir::omp::EarlyOutliningInterface>(
-          opInst.getParentOfType<LLVM::LLVMFuncOp>().getOperation())) {
+  if (auto earlyOutlineOp =
+          llvm::dyn_cast<mlir::omp::EarlyOutliningInterface>(*parentFn)) {
     llvm::StringRef outlineParentName = earlyOutlineOp.getParentName();
     parentName = outlineParentName.empty() ? parentName : outlineParentName;
   }
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index ec2692f58695d0..16d73a8e5e57cc 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -1735,6 +1735,11 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
                                  .value()));
   }
 
+  if (llvm::Attribute attr = func->getFnAttribute("target-cpu");
+      attr.isStringAttribute()) {
+    funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString()));
+  }
+
   if (llvm::Attribute attr = func->getFnAttribute("target-features");
       attr.isStringAttribute()) {
     funcOp.setTargetFeaturesAttr(
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 3dd082aae19338..20726ab478212f 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -971,6 +971,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
   if (func.getArmNewZa())
     llvmFunc->addFnAttr("aarch64_pstate_za_new");
 
+  if (auto targetCpu = func.getTargetCpu())
+    llvmFunc->addFnAttr("target-cpu", *targetCpu);
+
   if (auto targetFeatures = func.getTargetFeatures())
     llvmFunc->addFnAttr("target-features", targetFeatures->getFeaturesString());
 
diff --git a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
index 765d8469f3c561..c788ff19d4a550 100644
--- a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
@@ -61,6 +61,14 @@ func.func @variadic_func(%arg0: i32) attributes { "func.varargs" = true } {
   return
 }
 
+// CHECK-LABEL: llvm.func @target_features()
+// CHECK-SAME: target_features = #llvm.target_features<["+sme", "+sve"]>
+func.func private @target_features() attributes { "llvm.target_features" = "+sme,+sve" }
+
+// CHECK-LABEL: llvm.func @target_cpu()
+// CHECK-SAME: target_cpu = "gfx90a"
+func.func private @target_cpu() attributes { "target_cpu" = "gfx90a" }
+
 // -----
 
 // CHECK-LABEL: llvm.func @private_callee
@@ -86,3 +94,8 @@ func.func private @badllvmlinkage(i32) attributes { "llvm.linkage" = 3 : i64 } /
 func.func @variadic_func(%arg0: i32) attributes { "func.varargs" = true, "llvm.emit_c_interface" } {
   return
 }
+
+// -----
+
+// expected-error at +1{{Contains llvm.target_features attribute not of type StringAttr}}
+func.func private @badllvmtargetfeatures() attributes { "llvm.target_features" = 1 : i64 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
index a103e65affacd8..6e7b113aa35c0b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -772,7 +772,7 @@ func.func @insert_slice_chain(
 // CHECK-SAME: bufferization.access = "none"
     %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
 // CHECK-SAME: bufferization.access = "write"
-  -> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
+  -> tensor<62x90xf32> attributes {passthrough = [["prefer-vector-width", "512"]], target_cpu = "skylake-avx512"}
 {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
diff --git a/mlir/test/Target/LLVMIR/Import/target-cpu.ll b/mlir/test/Target/LLVMIR/Import/target-cpu.ll
new file mode 100644
index 00000000000000..84c0f96c267a11
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/target-cpu.ll
@@ -0,0 +1,9 @@
+; RUN: mlir-translate -import-llvm -split-input-file %s | FileCheck %s
+
+; CHECK-LABEL: llvm.func @target_cpu()
+; CHECK-SAME: target_cpu = "gfx90a"
+define void @target_cpu() #0 {
+  ret void
+}
+
+attributes #0 = { "target-cpu"="gfx90a" }
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index c99f2954d76138..5723a48fe38a68 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -3,7 +3,7 @@
 // The aim of the test is to check the LLVM IR codegen for the device
 // for omp target parallel construct
 
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
   llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} {
     %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
     omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-features.mlir b/mlir/test/Target/LLVMIR/omptarget-target-features.mlir
new file mode 100644
index 00000000000000..79c6718f07db30
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-target-features.mlir
@@ -0,0 +1,23 @@
+// Test that the target_features and target_cpu llvm.func attributes are
+// forwarded to outlined target region functions.
+
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+  llvm.func @omp_target_region() attributes {
+    target_cpu = "x86-64",
+    target_features = #llvm.target_features<["+mmx", "+sse"]>
+  } {
+    omp.target {
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: define void @omp_target_region() #[[ATTRS1:.*]] {
+// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS1]] {
+
+// CHECK: attributes #[[ATTRS1]] = {
+// CHECK-SAME: "target-cpu"="x86-64"
+// CHECK-SAME: "target-features"="+mmx,+sse"
diff --git a/mlir/test/Target/LLVMIR/target-cpu.mlir b/mlir/test/Target/LLVMIR/target-cpu.mlir
new file mode 100644
index 00000000000000..182ae474394f45
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/target-cpu.mlir
@@ -0,0 +1,7 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: define void @target_cpu
+// CHECK: attributes #{{.*}} = { "target-cpu"="gfx90a" }
+llvm.func @target_cpu() attributes {target_cpu = "gfx90a"} {
+  llvm.return
+}

>From d96e598fe8270a901b963e1cb7a1fdec31185731 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Thu, 14 Dec 2023 11:44:17 +0000
Subject: [PATCH 2/3] Use LLVM::TargetFeaturesAttr to represent target features
 also in func.func

---
 flang/lib/Lower/Bridge.cpp                    |  6 +++--
 .../Lower/OpenMP/FIR/target_cpu_features.f90  | 22 +++++++++++--------
 .../test/Lower/OpenMP/target_cpu_features.f90 | 22 +++++++++++--------
 flang/test/Lower/target-features.f90          | 10 +++++----
 mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 15 -------------
 .../Conversion/FuncToLLVM/convert-funcs.mlir  |  7 +-----
 .../LLVMIR/omptarget-target-features.mlir     |  6 ++---
 mlir/test/Target/LLVMIR/target-cpu.mlir       |  4 ++--
 8 files changed, 42 insertions(+), 50 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 2c2424a47d28d5..393522a91ce730 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -54,6 +54,7 @@
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Parser/Parser.h"
 #include "mlir/Transforms/RegionUtils.h"
@@ -4332,10 +4333,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     if (auto targetCPU = targetMachine.getTargetCPU(); !targetCPU.empty())
       func->setAttr("target_cpu",
                     mlir::StringAttr::get(func.getContext(), targetCPU));
+
     if (auto targetFeatures = targetMachine.getTargetFeatureString();
         !targetFeatures.empty())
-      func->setAttr("llvm.target_features",
-                    mlir::StringAttr::get(func.getContext(), targetFeatures));
+      func->setAttr("target_features", mlir::LLVM::TargetFeaturesAttr::get(
+                                           func.getContext(), targetFeatures));
 
     // Manage floating point exception, halting mode, and rounding mode
     // settings at function entry and exit.
diff --git a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
index 179b71b3f0cfa5..46051e03179e18 100644
--- a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
@@ -1,5 +1,5 @@
 !REQUIRES: amdgpu-registered-target, nvptx-registered-target
-!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
 !RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
 
 
@@ -7,16 +7,20 @@
 ! Target_Enter Simple
 !===============================================================================
 
-!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
-!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
-!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
-!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
-!CHECK-SAME: +wavefrontsize64">
-!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
-!CHECK-LABEL: func.func @_QPomp_target_simple()
+!AMDGCN-LABEL: func.func @_QPomp_target_simple() attributes {
+!AMDGCN-SAME: target_cpu = "gfx908"
+!AMDGCN-SAME: target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
+!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
+!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
+!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
+
+!NVPTX-LABEL: func.func @_QPomp_target_simple() attributes {
+!NVPTX-SAME: target_cpu = "sm_80"
+!NVPTX-SAME: target_features = #llvm.target_features<["+ptx61", "+sm_80"]>
+
 subroutine omp_target_simple
   ! Directive needed to prevent subroutine from being filtered out when
   ! compiling for the device.
   !$omp declare target
 end subroutine omp_target_simple
-
diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index ea1e5e38fca88e..aa0b049565b99a 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -1,21 +1,25 @@
 !REQUIRES: amdgpu-registered-target, nvptx-registered-target
-!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
 !RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
 
 !===============================================================================
 ! Target_Enter Simple
 !===============================================================================
 
-!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
-!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
-!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
-!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
-!CHECK-SAME: +wavefrontsize64">
-!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
-!CHECK-LABEL: func.func @_QPomp_target_simple()
+!AMDGCN-LABEL: func.func @_QPomp_target_simple() attributes {
+!AMDGCN-SAME: target_cpu = "gfx908"
+!AMDGCN-SAME: target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
+!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
+!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
+!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
+
+!NVPTX-LABEL: func.func @_QPomp_target_simple() attributes {
+!NVPTX-SAME: target_cpu = "sm_80"
+!NVPTX-SAME: target_features = #llvm.target_features<["+ptx61", "+sm_80"]>
+
 subroutine omp_target_simple
   ! Directive needed to prevent subroutine from being filtered out when
   ! compiling for the device.
   !$omp declare target
 end subroutine omp_target_simple
-
diff --git a/flang/test/Lower/target-features.f90 b/flang/test/Lower/target-features.f90
index 102f89c6de63ab..237f2b5f23eb26 100644
--- a/flang/test/Lower/target-features.f90
+++ b/flang/test/Lower/target-features.f90
@@ -5,16 +5,18 @@
 
 ! ALL-LABEL: func.func @_QPfoo()
 
-! NONE-NOT: llvm.target_features
 ! NONE-NOT: target_cpu
+! NONE-NOT: target_features
 
-! TRIPLE-NOT: llvm.target_features
 ! TRIPLE-SAME: target_cpu = "generic-hsa"
+! TRIPLE-NOT: target_features
 
-! CPU-NOT: llvm.target_features
 ! CPU-SAME: target_cpu = "gfx90a"
+! CPU-NOT: target_features
 
-! BOTH-SAME: llvm.target_features = "{{[^"]*}}+gfx90a-insts{{[^"]*}}"
 ! BOTH-SAME: target_cpu = "gfx90a"
+! BOTH-SAME: target_features = #llvm.target_features<[
+! BOTH-SAME: "+gfx90a-insts"
+! BOTH-SAME: ]>
 subroutine foo
 end subroutine
diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
index 552175804ec2c4..bd50c67fb87958 100644
--- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
+++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
@@ -64,7 +64,6 @@ using namespace mlir;
 static constexpr StringRef varargsAttrName = "func.varargs";
 static constexpr StringRef linkageAttrName = "llvm.linkage";
 static constexpr StringRef barePtrAttrName = "llvm.bareptr";
-static constexpr StringRef targetFeaturesAttrName = "llvm.target_features";
 
 /// Return `true` if the `op` should use bare pointer calling convention.
 static bool shouldUseBarePtrCallConv(Operation *op,
@@ -80,7 +79,6 @@ static void filterFuncAttributes(FunctionOpInterface func,
   for (const NamedAttribute &attr : func->getDiscardableAttrs()) {
     if (attr.getName() == linkageAttrName ||
         attr.getName() == varargsAttrName ||
-        attr.getName() == targetFeaturesAttrName ||
         attr.getName() == LLVM::LLVMDialect::getReadnoneAttrName())
       continue;
     result.push_back(attr);
@@ -381,19 +379,6 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp,
     newFuncOp.setMemoryAttr(memoryAttr);
   }
 
-  // Create target_features attribute.
-  if (funcOp->hasAttr(targetFeaturesAttrName)) {
-    auto attr = funcOp->getAttrOfType<StringAttr>(targetFeaturesAttrName);
-    if (!attr) {
-      funcOp->emitError() << "Contains " << targetFeaturesAttrName
-                          << " attribute not of type StringAttr";
-      return rewriter.notifyMatchFailure(
-          funcOp, "Contains target features attribute not of type StringAttr");
-    }
-    newFuncOp.setTargetFeaturesAttr(
-        LLVM::TargetFeaturesAttr::get(rewriter.getContext(), attr.strref()));
-  }
-
   // Propagate argument/result attributes to all converted arguments/result
   // obtained after converting a given original argument/result.
   if (ArrayAttr resAttrDicts = funcOp.getAllResultAttrs()) {
diff --git a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
index c788ff19d4a550..8d471faab16c64 100644
--- a/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/convert-funcs.mlir
@@ -63,7 +63,7 @@ func.func @variadic_func(%arg0: i32) attributes { "func.varargs" = true } {
 
 // CHECK-LABEL: llvm.func @target_features()
 // CHECK-SAME: target_features = #llvm.target_features<["+sme", "+sve"]>
-func.func private @target_features() attributes { "llvm.target_features" = "+sme,+sve" }
+func.func private @target_features() attributes { "target_features" = #llvm.target_features<["+sme", "+sve"]> }
 
 // CHECK-LABEL: llvm.func @target_cpu()
 // CHECK-SAME: target_cpu = "gfx90a"
@@ -94,8 +94,3 @@ func.func private @badllvmlinkage(i32) attributes { "llvm.linkage" = 3 : i64 } /
 func.func @variadic_func(%arg0: i32) attributes { "func.varargs" = true, "llvm.emit_c_interface" } {
   return
 }
-
-// -----
-
-// expected-error at +1{{Contains llvm.target_features attribute not of type StringAttr}}
-func.func private @badllvmtargetfeatures() attributes { "llvm.target_features" = 1 : i64 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-features.mlir b/mlir/test/Target/LLVMIR/omptarget-target-features.mlir
index 79c6718f07db30..fddb799142820b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-target-features.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-target-features.mlir
@@ -15,9 +15,9 @@ module attributes {omp.is_target_device = false} {
   }
 }
 
-// CHECK: define void @omp_target_region() #[[ATTRS1:.*]] {
-// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS1]] {
+// CHECK: define void @omp_target_region() #[[ATTRS:.*]] {
+// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS]] {
 
-// CHECK: attributes #[[ATTRS1]] = {
+// CHECK: attributes #[[ATTRS]] = {
 // CHECK-SAME: "target-cpu"="x86-64"
 // CHECK-SAME: "target-features"="+mmx,+sse"
diff --git a/mlir/test/Target/LLVMIR/target-cpu.mlir b/mlir/test/Target/LLVMIR/target-cpu.mlir
index 182ae474394f45..80f8172143259a 100644
--- a/mlir/test/Target/LLVMIR/target-cpu.mlir
+++ b/mlir/test/Target/LLVMIR/target-cpu.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
-// CHECK-LABEL: define void @target_cpu
-// CHECK: attributes #{{.*}} = { "target-cpu"="gfx90a" }
+// CHECK: define void @target_cpu() #[[ATTRS:.*]] {
+// CHECK: attributes #[[ATTRS]] = { "target-cpu"="gfx90a" }
 llvm.func @target_cpu() attributes {target_cpu = "gfx90a"} {
   llvm.return
 }

>From 683022fc04eeae7b388401e254f3a928f817c22e Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Mon, 15 Jan 2024 13:53:27 +0000
Subject: [PATCH 3/3] Update Flang unit tests

---
 ...eatures.f90 => target-features-amdgcn.f90} |  1 +
 flang/test/Lower/target-features-x86_64.f90   | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 rename flang/test/Lower/{target-features.f90 => target-features-amdgcn.f90} (95%)
 create mode 100644 flang/test/Lower/target-features-x86_64.f90

diff --git a/flang/test/Lower/target-features.f90 b/flang/test/Lower/target-features-amdgcn.f90
similarity index 95%
rename from flang/test/Lower/target-features.f90
rename to flang/test/Lower/target-features-amdgcn.f90
index 237f2b5f23eb26..8bd6773a43d4e3 100644
--- a/flang/test/Lower/target-features.f90
+++ b/flang/test/Lower/target-features-amdgcn.f90
@@ -1,3 +1,4 @@
+! REQUIRES: amdgpu-registered-target
 ! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s --check-prefixes=ALL,NONE
 ! RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefixes=ALL,TRIPLE
 ! RUN: %flang_fc1 -emit-fir -target-cpu gfx90a %s -o - | FileCheck %s --check-prefixes=ALL,CPU
diff --git a/flang/test/Lower/target-features-x86_64.f90 b/flang/test/Lower/target-features-x86_64.f90
new file mode 100644
index 00000000000000..c4190e83f6a6c7
--- /dev/null
+++ b/flang/test/Lower/target-features-x86_64.f90
@@ -0,0 +1,21 @@
+! REQUIRES: x86-registered-target
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=ALL,NONE
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o - | FileCheck %s --check-prefixes=ALL,CPU
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-feature +sse %s -o - | FileCheck %s --check-prefixes=ALL,FEATURE
+! RUN: %flang_fc1 -emit-fir -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -target-feature +sse %s -o - | FileCheck %s --check-prefixes=ALL,BOTH
+
+! ALL-LABEL: func.func @_QPfoo()
+
+! NONE-NOT: target_cpu
+! NONE-NOT: target_features
+
+! CPU-SAME: target_cpu = "x86-64"
+! CPU-NOT: target_features
+
+! FEATURE-NOT: target_cpu
+! FEATURE-SAME: target_features = #llvm.target_features<["+sse"]>
+
+! BOTH-SAME: target_cpu = "x86-64"
+! BOTH-SAME: target_features = #llvm.target_features<["+sse"]>
+subroutine foo
+end subroutine