[Mlir-commits] [mlir] ba6861c - [OpenMPIRBuilder] Cast device num_threads to i32 for __kmpc_parallel_60 (#194634)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Apr 28 09:46:25 PDT 2026
Author: Abid Qadeer
Date: 2026-04-28T17:46:19+01:00
New Revision: ba6861c2bc30dc02167e2e43cb2dfdaedc2e700a
URL: https://github.com/llvm/llvm-project/commit/ba6861c2bc30dc02167e2e43cb2dfdaedc2e700a
DIFF: https://github.com/llvm/llvm-project/commit/ba6861c2bc30dc02167e2e43cb2dfdaedc2e700a.diff
LOG: [OpenMPIRBuilder] Cast device num_threads to i32 for __kmpc_parallel_60 (#194634)
I observed a crash in device OpenMP lowering when compiling with
`-fdefault-integer-8`. In `targetParallelCallback`, `NumThreads` can be
`i64`, but `__kmpc_parallel_60` expects an `i32` `num_threads`
parameter, which caused a bad-signature assertion during call creation.
The fix is to use `CreateZExtOrTrunc(..., Int32)` for the `num_threads`
argument before building the runtime call. This matches the handling
used in clang in `CGOpenMPRuntimeGPU::emitParallelCall`.
The problem can be seen with the following testcase whe compiled with
`flang -fopenmp --offload-arch=gfx90a test.f90 -fdefault-integer-8``
```
program test
implicit none
integer :: nthreads
integer :: i
nthreads = 137
!$omp target teams distribute parallel do num_threads(nthreads)
do i = 1, 1
end do
!$omp end target teams distribute parallel do
end program test
```
Added:
mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
Modified:
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 24cf5c389d8fe..5a4f12d91d540 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1622,6 +1622,9 @@ static void targetParallelCallback(
Value *Cond =
IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
: Builder.getInt32(1);
+ Value *NumThreadsArg =
+ NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
+ : Builder.getInt32(-1);
// If this is not a Generic kernel, we can skip generating the wrapper.
Value *WrapperFn;
@@ -1635,7 +1638,7 @@ static void targetParallelCallback(
/* identifier*/ Ident,
/* global thread num*/ ThreadID,
/* if expression */ Cond,
- /* number of threads */ NumThreads ? NumThreads : Builder.getInt32(-1),
+ /* number of threads */ NumThreadsArg,
/* Proc bind */ Builder.getInt32(-1),
/* outlined function */ &OutlinedFn,
/* wrapper function */ WrapperFn,
diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
new file mode 100644
index 0000000000000..838202456d7b1
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-spmd-num-threads-i64.mlir
@@ -0,0 +1,19 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
+ llvm.func @main(%nt : !llvm.ptr) {
+ %map_nt = omp.map.info var_ptr(%nt : !llvm.ptr, i64) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "nthreads"}
+ omp.target map_entries(%map_nt -> %arg_nt : !llvm.ptr) {
+ %num_threads = llvm.load %arg_nt : !llvm.ptr -> i64
+ omp.parallel num_threads(%num_threads : i64) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}
+// CHECK: [[NT32:%.*]] = trunc i64 {{.*}} to i32
+// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 [[NT32]], i32 {{.*}}, ptr @{{.*}}, ptr {{.*}}, ptr {{.*}}, i64 {{.*}}, i32 {{.*}})
More information about the Mlir-commits
mailing list