[mlir] [llvm] [OMPIRBuilder] Do not call __kmpc_push_num_threads for device parallel (PR #71934)

Dominik Adamski via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 10 04:52:25 PST 2023


https://github.com/DominikAdamski created https://github.com/llvm/llvm-project/pull/71934

Function __kmpc_push_num_threads should be called only if we specify number of threads for host parallel region.

Number of threads specified by the user should be passed as one of arguments of __kmpc_parallel_51 function.

>From f07530facc8532fe5ecc7351d5ff5bb7973c4dd4 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 10 Nov 2023 05:34:07 -0600
Subject: [PATCH] [OMPIRBuilder] Do not call __kmpc_push_num_threads for device
 parallel

Function __kmpc_push_num_threads should be called only if
we specify number of threads for host parallel region.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  5 +++--
 .../LLVMIR/omptarget-parallel-llvm.mlir       | 21 +++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index eb6c120b2a6dcbd..6be58d70648f4db 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1305,8 +1305,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   // function arguments are declared in zero address space
   bool ArgsInZeroAddressSpace = Config.isTargetDevice();
 
-  if (NumThreads) {
-    // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
+  // only if we compile for host side.
+  if (NumThreads && !Config.isTargetDevice()) {
     Value *Args[] = {
         Ident, ThreadID,
         Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index 4457d7f4275260d..2628e42d533b50e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -17,6 +17,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     }
   llvm.return
   }
+
+  llvm.func @_test_num_threads(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, omp.outline_parent_name = "_QQmain"} {
+    %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+    omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {
+    ^bb0(%arg2: !llvm.ptr):
+      %1 = llvm.mlir.constant(156 : i32) : i32
+      omp.parallel num_threads(%1 : i32) {
+        %2 = llvm.mlir.constant(1 : i32) : i32
+        llvm.store %2, %arg2 : i32, !llvm.ptr
+        omp.terminator
+      }
+    omp.terminator
+    }
+  llvm.return
+  }
 }
 
 // CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]](
@@ -43,3 +58,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 // CHECK: define internal void [[FUNC1]](
 // CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 
+// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC_NUM_THREADS0:@.*]](
+// CHECK-NOT:     call void @__kmpc_push_num_threads(
+// CHECK:         call void @__kmpc_parallel_51(ptr addrspacecast (
+// CHECK-SAME:  ptr addrspace(1) @[[NUM_THREADS_GLOB:[0-9]+]] to ptr),
+// CHECK-SAME:  i32 [[NUM_THREADS_TMP0:%.*]], i32 1, i32 156,
+// CHECK-SAME:  i32 -1,  ptr [[FUNC_NUM_THREADS1:@.*]], ptr null, ptr [[NUM_THREADS_TMP1:%.*]], i64 1)



More information about the llvm-commits mailing list