[Mlir-commits] [mlir] [MLIR][OpenMP] Prevent teams reductions from deadlocking (PR #184625)
Sergio Afonso
llvmlistbot at llvm.org
Wed Mar 4 06:43:26 PST 2026
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/184625
Currently, simple Fortran reductions like the example below cause a deadlock at runtime:
```f90
integer :: i, x
!$omp teams distribute reduction(+:x)
do i=1, 10
x = x + 1
end do
```
Preventing a redundant barrier from being added in that case addresses this issue. Synchronization is already being handled by the `__kmpc_reduce` and `__kmpc_end_reduce` runtime calls for the host, and by the OMPIRBuilder-generated `_omp_reduction_inter_warp_copy_func` function for GPUs.
>From de283b34c5390403e4b84839c04968702daac841 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Wed, 4 Mar 2026 14:22:40 +0000
Subject: [PATCH] [MLIR][OpenMP] Prevent teams reductions from deadlocking
Currently, simple Fortran reductions like the example below cause a
deadlock at runtime:
```f90
integer :: i, x
!$omp teams distribute reduction(+:x)
do i=1, 10
x = x + 1
end do
```
Preventing a redundant barrier from being added in that case addresses
this issue. Synchronization is already being handled by the `__kmpc_reduce` and
`__kmpc_end_reduce` runtime calls for the host, and by
`_omp_reduction_inter_warp_copy_func` for GPUs.
---
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 14 +++++++++-----
.../omptarget-teams-distribute-reduction.mlir | 2 --
.../Target/LLVMIR/omptarget-teams-reduction.mlir | 1 -
.../LLVMIR/openmp-teams-distribute-reduction.mlir | 6 ------
.../test/Target/LLVMIR/openmp-teams-reduction.mlir | 4 ----
5 files changed, 9 insertions(+), 18 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 571575762d54a..3e7a6c88aec3a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1468,14 +1468,18 @@ static LogicalResult createReductionsAndCleanup(
if (!contInsertPoint->getBlock())
return op->emitOpError() << "failed to convert reductions";
- llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
- ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
+ llvm::OpenMPIRBuilder::InsertPointTy afterIP = *contInsertPoint;
+ if (!isTeamsReduction) {
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy barrierIP =
+ ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
- if (failed(handleError(afterIP, *op)))
- return failure();
+ if (failed(handleError(barrierIP, *op)))
+ return failure();
+ afterIP = *barrierIP;
+ }
tempTerminator->eraseFromParent();
- builder.restoreIP(*afterIP);
+ builder.restoreIP(afterIP);
// after the construct, deallocate private reduction variables
SmallVector<Region *> reductionRegions;
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
index b7cb1026967f3..8cb6594f9130e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
@@ -56,8 +56,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK: icmp eq i32 %[[MASTER]], 1
// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
-// CHECK: call void @__kmpc_barrier
-
// CHECK: [[THEN]]:
// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
index 36eb280dfcfa2..a6154235ec874 100644
--- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
@@ -67,7 +67,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
// CHECK-NEXT: store i32 %[[FINAL_RESULT]]
-// CHECK: call void @__kmpc_barrier
// CHECK: call void @__kmpc_target_deinit
// CHECK: define internal void @[[OUTLINED]]
diff --git a/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
index 9e033f6a4da3c..ac58e07e3ba37 100644
--- a/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
@@ -60,12 +60,6 @@ llvm.func @simple_teams_reduction_() attributes {fir.internal_name = "_QPsimple_
// Non atomic version
// CHECK: call void @__kmpc_end_reduce
-// Finalize
-// CHECK: br label %[[FINALIZE:.+]]
-
-// CHECK: [[FINALIZE]]:
-// CHECK: call void @__kmpc_barrier
-
// Reduction function.
// CHECK: define internal void @[[REDFUNC]]
// CHECK: add i32
diff --git a/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
index 800a833cf5601..79110565455dd 100644
--- a/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
@@ -62,12 +62,8 @@ llvm.func @simple_teams_only_reduction_() attributes {fir.internal_name = "_QPsi
// CHECK-SAME: %[[REDARRAY]]
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
-// CHECK: [[FINALIZE:.+]]:
-// CHECK: call void @__kmpc_barrier
-
// Non atomic version
// CHECK: call void @__kmpc_end_reduce
-// CHECK: br label %[[FINALIZE]]
// Atomic version not generated
// CHECK: unreachable
More information about the Mlir-commits
mailing list