[Mlir-commits] [mlir] [llvm] [OpenMPIRBuilder] Do not call host runtime for GPU teams codegen (PR #79984)

Dominik Adamski llvmlistbot at llvm.org
Tue Jan 30 05:05:45 PST 2024


https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/79984

>From 4b7edb661e19bfd3dac7f374e4f40709d2b46b91 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 30 Jan 2024 04:39:18 -0600
Subject: [PATCH 1/3] [OpenMPIRBuilder] Do not call host runtime for GPU teams
 codegen

It ensures that host runtime functions are not called
for handling OpenMP teams clause on the device.

GPU code for pragma `omp target teams distribute parallel do`
will require only one call to OpenMP loop-worksharing runtime.
Support for it will be added later.

This patch does not include changes required for handling
`omp target teams` for the host side.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  9 +++++--
 .../Target/LLVMIR/omptarget-teams-llvm.mlir   | 25 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 211281452de2..9ca45b0ba2f6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6251,8 +6251,9 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
   BasicBlock *AllocaBB =
       splitBB(Builder, /*CreateBranch=*/true, "teams.alloca");
 
+  bool SubClausesPresent = (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
   // Push num_teams
-  if (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr) {
+  if (!Config.isTargetDevice() && SubClausesPresent) {
     assert((NumTeamsLower == nullptr || NumTeamsUpper != nullptr) &&
            "if lowerbound is non-null, then upperbound must also be non-null "
            "for bounds on num_teams");
@@ -6305,7 +6306,7 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
   OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
       Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true));
 
-  OI.PostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable {
+  auto HostPostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable {
     // The stale call instruction will be replaced with a new call instruction
     // for runtime call with the outlined function.
 
@@ -6342,6 +6343,10 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
     }
   };
 
+  if (!Config.isTargetDevice()) {
+    OI.PostOutlineCB = HostPostOutlineCB;
+  }
+
   addOutlineInfo(std::move(OI));
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
new file mode 100644
index 000000000000..c4611a826539
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// The aim of the test is to check the LLVM IR codegen for the device
+// for omp teams construct
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
+  llvm.func @foo(i32)
+  llvm.func @omp_target_teams_shared_simple(%arg0 : i32)  {
+    omp.teams {
+      llvm.call @foo(%arg0) : (i32) -> ()
+      omp.terminator
+    }
+  llvm.return
+  }
+}
+
+// CHECK-LABEL: @omp_target_teams_shared_simple
+// CHECK-SAME: (i32 [[ARG0:%.+]])
+// CHECK: call void @[[OUTLINED_FN:.*]](
+// CHECK-NOT: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[OUTLINED_FN:.+]], ptr [[STRUCT_ARG:.*]])
+// CHECK: ret void
+
+//CHECK: define internal void @[[OUTLINED_FN]](
+//CHECK: call void @foo(i32 %[[FOO_ARG:.*]])
+//CHECK: ret void

>From ef59570f1a65c4f27d1952e89ec70e64effb1f4c Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 30 Jan 2024 05:26:32 -0600
Subject: [PATCH 2/3] Clang format

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9ca45b0ba2f6..030bbc18acfa 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6251,7 +6251,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
   BasicBlock *AllocaBB =
       splitBB(Builder, /*CreateBranch=*/true, "teams.alloca");
 
-  bool SubClausesPresent = (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
+  bool SubClausesPresent =
+      (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
   // Push num_teams
   if (!Config.isTargetDevice() && SubClausesPresent) {
     assert((NumTeamsLower == nullptr || NumTeamsUpper != nullptr) &&
@@ -6306,7 +6307,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
   OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
       Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true));
 
-  auto HostPostOutlineCB = [this, Ident, ToBeDeleted](Function &OutlinedFn) mutable {
+  auto HostPostOutlineCB = [this, Ident,
+                            ToBeDeleted](Function &OutlinedFn) mutable {
     // The stale call instruction will be replaced with a new call instruction
     // for runtime call with the outlined function.
 

>From 95c186093c4a059f7bc5ab6989380859cbae52b1 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 30 Jan 2024 07:05:25 -0600
Subject: [PATCH 3/3] Applied remarks

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp         | 3 +--
 mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 030bbc18acfa..8eb8a13686dd 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6345,9 +6345,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
     }
   };
 
-  if (!Config.isTargetDevice()) {
+  if (!Config.isTargetDevice())
     OI.PostOutlineCB = HostPostOutlineCB;
-  }
 
   addOutlineInfo(std::move(OI));
 
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
index c4611a826539..96cced7a1d58 100644
--- a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir
@@ -3,7 +3,7 @@
 // The aim of the test is to check the LLVM IR codegen for the device
 // for omp teams construct
 
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
+module attributes {omp.is_target_device = true} {
   llvm.func @foo(i32)
   llvm.func @omp_target_teams_shared_simple(%arg0 : i32)  {
     omp.teams {
@@ -17,7 +17,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 // CHECK-LABEL: @omp_target_teams_shared_simple
 // CHECK-SAME: (i32 [[ARG0:%.+]])
 // CHECK: call void @[[OUTLINED_FN:.*]](
-// CHECK-NOT: call void {{.+}} @__kmpc_fork_teams(ptr @{{.+}}, i32 1, ptr [[OUTLINED_FN:.+]], ptr [[STRUCT_ARG:.*]])
+// CHECK-NOT: call {{.+}} @__kmpc_fork_teams
 // CHECK: ret void
 
 //CHECK: define internal void @[[OUTLINED_FN]](



More information about the Mlir-commits mailing list