[Mlir-commits] [llvm] [mlir] [OpenMPIRBuilder] Cast device num_threads to i32 for __kmpc_parallel_60 (PR #194634)

Abid Qadeer llvmlistbot at llvm.org
Tue Apr 28 08:37:16 PDT 2026


https://github.com/abidh updated https://github.com/llvm/llvm-project/pull/194634

>From 785895535ec007cc54b6421ebd1556f86900b48c Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Tue, 28 Apr 2026 14:40:57 +0100
Subject: [PATCH 1/2] [OpenMPIRBuilder] Cast device num_threads to i32 for
 __kmpc_parallel_60

Fix a crash in device OpenMP lowering when compiling with
`-fdefault-integer-8`. In `targetParallelCallback`, `NumThreads` can be
`i64`, but `__kmpc_parallel_60` expects an `i32` num_threads parameter,
which caused a bad-signature assertion during call creation.

The fix is to use `CreateZExtOrTrunc(..., Int32)` for the `num_threads`
argument before building the runtime call. This matches the handling
used in clang in `CGOpenMPRuntimeGPU::emitParallelCall`.

The problem can be seen with the following testcase whe compiled with
`flang -fopenmp --offload-arch=gfx90a test.f90 -fdefault-integer-8``

program num_threads_i8_target_parallel
  implicit none
  integer :: nthreads
  integer :: i
  nthreads = 137
  !$omp target teams distribute parallel do num_threads(nthreads)
  do i = 1, 1
  end do
  !$omp end target teams distribute parallel do
end program num_threads_i8_target_parallel
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  5 +++-
 .../openmp-target-spmd-num-threads-i64.mlir   | 27 +++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 407ffcf2826df..24e5282d3667f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1455,13 +1455,16 @@ static void targetParallelCallback(
   Value *Cond =
       IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
                   : Builder.getInt32(1);
+  Value *NumThreadsArg =
+      NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
+                 : Builder.getInt32(-1);
 
   // Build kmpc_parallel_60 call
   Value *Parallel60CallArgs[] = {
       /* identifier*/ Ident,
       /* global thread num*/ ThreadID,
       /* if expression */ Cond,
-      /* number of threads */ NumThreads ? NumThreads : Builder.getInt32(-1),
+      /* number of threads */ NumThreadsArg,
       /* Proc bind */ Builder.getInt32(-1),
       /* outlined function */ &OutlinedFn,
       /* wrapper function */ NullPtrValue,
diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
new file mode 100644
index 0000000000000..126f836fad21e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
@@ -0,0 +1,27 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+  llvm.func @main(%x : i32) {
+    omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) {
+      %numThreads = llvm.mlir.constant(137 : i64) : i64
+      omp.teams {
+        omp.parallel num_threads(%numThreads : i64) {
+          omp.distribute {
+            omp.wsloop {
+              omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+                omp.yield
+              }
+            } {omp.composite}
+          } {omp.composite}
+          omp.terminator
+        } {omp.composite}
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: define internal void @[[TARGET_OUTLINE:.*]]({{.*}})
+// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 137, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}}, i32 {{.*}})

>From 69ff0044c886657e56b98003581ecd46a141e70e Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Tue, 28 Apr 2026 16:35:04 +0100
Subject: [PATCH 2/2] Handle review comments.

Improve and simplify testcase.
---
 .../openmp-target-spmd-num-threads-i64.mlir   | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
index 126f836fad21e..838202456d7b1 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
@@ -1,20 +1,11 @@
 // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
 
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
-  llvm.func @main(%x : i32) {
-    omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) {
-      %numThreads = llvm.mlir.constant(137 : i64) : i64
-      omp.teams {
-        omp.parallel num_threads(%numThreads : i64) {
-          omp.distribute {
-            omp.wsloop {
-              omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-                omp.yield
-              }
-            } {omp.composite}
-          } {omp.composite}
-          omp.terminator
-        } {omp.composite}
+  llvm.func @main(%nt : !llvm.ptr) {
+    %map_nt = omp.map.info var_ptr(%nt : !llvm.ptr, i64) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "nthreads"}
+    omp.target map_entries(%map_nt -> %arg_nt : !llvm.ptr) {
+      %num_threads = llvm.load %arg_nt : !llvm.ptr -> i64
+      omp.parallel num_threads(%num_threads : i64) {
         omp.terminator
       }
       omp.terminator
@@ -23,5 +14,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
   }
 }
 
-// CHECK: define internal void @[[TARGET_OUTLINE:.*]]({{.*}})
-// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 137, i32 {{.*}}, ptr @[[PARALLEL_OUTLINE:.*]], ptr {{.*}}, ptr {{.*}}, i64 {{.*}}, i32 {{.*}})
+// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}
+// CHECK: [[NT32:%.*]] = trunc i64 {{.*}} to i32
+// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 [[NT32]], i32 {{.*}}, ptr @{{.*}}, ptr {{.*}}, ptr {{.*}}, i64 {{.*}}, i32 {{.*}})



More information about the Mlir-commits mailing list