[flang-commits] [flang] 92bbf61 - [Flang][MLIR][OpenMP] Use function-attached target attributes for OpenMP lowering (#78291)

via flang-commits flang-commits at lists.llvm.org
Fri Feb 2 05:16:42 PST 2024


Author: Sergio Afonso
Date: 2024-02-02T13:16:36Z
New Revision: 92bbf615f50c67030ed536f08cc5bb266e0718db

URL: https://github.com/llvm/llvm-project/commit/92bbf615f50c67030ed536f08cc5bb266e0718db
DIFF: https://github.com/llvm/llvm-project/commit/92bbf615f50c67030ed536f08cc5bb266e0718db.diff

LOG: [Flang][MLIR][OpenMP] Use function-attached target attributes for OpenMP lowering (#78291)

This patch removes the omp.target module attribute, since the
information it held on the target CPU and features is available through
the fir.target_cpu and fir.target_features module attributes. Target
outlining during the MLIR to LLVM IR translation stage is updated, so
that these attributes, at that point available as llvm.func attributes,
are passed along to the newly created function.

Added: 
    mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir

Modified: 
    flang/include/flang/Tools/CrossToolHelpers.h
    flang/lib/Frontend/FrontendActions.cpp
    flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
    flang/test/Lower/OpenMP/target_cpu_features.f90
    mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
    mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index b61224ff4f1b3..5c59c99675699 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -109,17 +109,6 @@ void setOffloadModuleInterfaceAttributes(
   }
 }
 
-//  Shares assinging of the OpenMP OffloadModuleInterface and its TargetCPU
-//  attribute accross Flang tools (bbc/flang)
-void setOffloadModuleInterfaceTargetAttribute(mlir::ModuleOp &module,
-    llvm::StringRef targetCPU, llvm::StringRef targetFeatures) {
-  // Should be registered by the OpenMPDialect
-  if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-          module.getOperation())) {
-    offloadMod.setTarget(targetCPU, targetFeatures);
-  }
-}
-
 void setOpenMPVersionAttribute(mlir::ModuleOp &module, int64_t version) {
   module.getOperation()->setAttr(
       mlir::StringAttr::get(module.getContext(), llvm::Twine{"omp.version"}),

diff  --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 4738f7ba57042..09e861db3ec6d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -301,9 +301,6 @@ bool CodeGenAction::beginSourceFileAction() {
           Fortran::common::LanguageFeature::OpenMP)) {
     setOffloadModuleInterfaceAttributes(*mlirModule,
                                         ci.getInvocation().getLangOpts());
-    setOffloadModuleInterfaceTargetAttribute(
-        *mlirModule, targetMachine.getTargetCPU(),
-        targetMachine.getTargetFeatureString());
     setOpenMPVersionAttribute(*mlirModule,
                               ci.getInvocation().getLangOpts().OpenMPVersion);
   }

diff  --git a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
index 179b71b3f0cfa..5154782e1ae17 100644
--- a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
@@ -1,22 +1,19 @@
 !REQUIRES: amdgpu-registered-target, nvptx-registered-target
-!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
-!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
-
+!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
+!RUN: %flang_fc1 -emit-fir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
 
 !===============================================================================
 ! Target_Enter Simple
 !===============================================================================
 
-!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
-!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
-!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
-!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
-!CHECK-SAME: +wavefrontsize64">
-!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
-!CHECK-LABEL: func.func @_QPomp_target_simple()
-subroutine omp_target_simple
-  ! Directive needed to prevent subroutine from being filtered out when
-  ! compiling for the device.
-  !$omp declare target
-end subroutine omp_target_simple
+!AMDGCN: module attributes {
+!AMDGCN-SAME: fir.target_cpu = "gfx908"
+!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
+!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
+!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
+!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
 
+!NVPTX: module attributes {
+!NVPTX-SAME: fir.target_cpu = "sm_80"
+!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]>

diff  --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index ea1e5e38fca88..ea8efcf5d256b 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -1,21 +1,19 @@
 !REQUIRES: amdgpu-registered-target, nvptx-registered-target
-!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
 !RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s
 
 !===============================================================================
 ! Target_Enter Simple
 !===============================================================================
 
-!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
-!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
-!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
-!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
-!CHECK-SAME: +wavefrontsize64">
-!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
-!CHECK-LABEL: func.func @_QPomp_target_simple()
-subroutine omp_target_simple
-  ! Directive needed to prevent subroutine from being filtered out when
-  ! compiling for the device.
-  !$omp declare target
-end subroutine omp_target_simple
+!AMDGCN: module attributes {
+!AMDGCN-SAME: fir.target_cpu = "gfx908"
+!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
+!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
+!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
+!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
 
+!NVPTX: module attributes {
+!NVPTX-SAME: fir.target_cpu = "sm_80"
+!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]>

diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 96c15e775a302..451828ec4ba77 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -72,15 +72,6 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
   let assemblyFormat = "`<` struct(params) `>`";
 }
 
-def TargetAttr : OpenMP_Attr<"Target", "target"> {
-  let parameters = (ins
-    StringRefParameter<>:$target_cpu,
-    StringRefParameter<>:$target_features
-  );
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
 
 class OpenMP_Op<string mnemonic, list<Trait> traits = []> :
       Op<OpenMP_Dialect, mnemonic, traits>;

diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 18c5335c63fb9..198a9a2357f2f 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -205,34 +205,6 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
                       assumeTeamsOversubscription, assumeThreadsOversubscription,
                       assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
       }]>,
-    InterfaceMethod<
-      /*description=*/[{
-        Get the Target attribute on the current module if it exists
-        and return the attribute, if it doesn't exist it returns a nullptr.
-      }],
-      /*retTy=*/"mlir::omp::TargetAttr",
-      /*methodName=*/"getTarget",
-      (ins), [{}], [{
-        if (Attribute flags = $_op->getAttr("omp.target"))
-          return ::llvm::dyn_cast_or_null<mlir::omp::TargetAttr>(flags);
-        return nullptr;
-      }]>,
-    InterfaceMethod<
-      /*description=*/[{
-        Set the attribute target on the current module with the
-        specified string arguments - name of cpu and corresponding features.
-      }],
-      /*retTy=*/"void",
-      /*methodName=*/"setTarget",
-      (ins "llvm::StringRef":$targetCPU,
-           "llvm::StringRef":$targetFeatures), [{}], [{
-        if (targetCPU.empty())
-          return;
-        $_op->setAttr(("omp." + mlir::omp::TargetAttr::getMnemonic()).str(),
-                  mlir::omp::TargetAttr::get($_op->getContext(),
-                                             targetCPU.str(),
-                                             targetFeatures.str()));
-      }]>,
     InterfaceMethod<
       /*description=*/[{
         Set a StringAttr on the current module containing the host IR file path. This

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 23e101f1e4527..17ce14fe642be 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2336,6 +2336,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   if (!targetOpSupported(opInst))
     return failure();
 
+  auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
   auto targetOp = cast<omp::TargetOp>(opInst);
   auto &targetRegion = targetOp.getRegion();
   DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
@@ -2345,6 +2346,22 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codeGenIP) -> InsertPointTy {
+    // Forward target-cpu and target-features function attributes from the
+    // original function to the new outlined function.
+    llvm::Function *llvmParentFn =
+        moduleTranslation.lookupFunction(parentFn.getName());
+    llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
+    assert(llvmParentFn && llvmOutlinedFn &&
+           "Both parent and outlined functions must exist at this point");
+
+    if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
+        attr.isStringAttribute())
+      llvmOutlinedFn->addFnAttr(attr);
+
+    if (auto attr = llvmParentFn->getFnAttribute("target-features");
+        attr.isStringAttribute())
+      llvmOutlinedFn->addFnAttr(attr);
+
     builder.restoreIP(codeGenIP);
     unsigned argIndex = 0;
     for (auto &mapOp : mapOperands) {
@@ -2363,7 +2380,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   };
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  StringRef parentName = opInst.getParentOfType<LLVM::LLVMFuncOp>().getName();
+  StringRef parentName = parentFn.getName();
 
   llvm::TargetRegionEntryInfo entryInfo;
 

diff  --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index 1fccb441f4d59..6c148944e034c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -3,7 +3,7 @@
 // The aim of the test is to check the LLVM IR codegen for the device
 // for omp target parallel construct
 
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
   llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
     %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
     omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {

diff  --git a/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir
new file mode 100644
index 0000000000000..fddb799142820
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir
@@ -0,0 +1,23 @@
+// Test that the target_features and target_cpu llvm.func attributes are
+// forwarded to outlined target region functions.
+
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+  llvm.func @omp_target_region() attributes {
+    target_cpu = "x86-64",
+    target_features = #llvm.target_features<["+mmx", "+sse"]>
+  } {
+    omp.target {
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: define void @omp_target_region() #[[ATTRS:.*]] {
+// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS]] {
+
+// CHECK: attributes #[[ATTRS]] = {
+// CHECK-SAME: "target-cpu"="x86-64"
+// CHECK-SAME: "target-features"="+mmx,+sse"


        


More information about the flang-commits mailing list