[Mlir-commits] [mlir] [MLIR][OpenMP] Prevent teams reductions from deadlocking (PR #184625)

Sergio Afonso llvmlistbot at llvm.org
Wed Mar 4 06:43:26 PST 2026


https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/184625

Currently, simple Fortran reductions like the example below cause a deadlock at runtime:

```f90
integer :: i, x

!$omp teams distribute reduction(+:x)
do i=1, 10
  x = x + 1
end do
```

Preventing a redundant barrier from being added in that case addresses this issue. Synchronization is already being handled by the `__kmpc_reduce` and `__kmpc_end_reduce` runtime calls for the host, and by the OMPIRBuilder-generated `_omp_reduction_inter_warp_copy_func` function for GPUs.

>From de283b34c5390403e4b84839c04968702daac841 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Wed, 4 Mar 2026 14:22:40 +0000
Subject: [PATCH] [MLIR][OpenMP] Prevent teams reductions from deadlocking

Currently, simple Fortran reductions like the example below cause a
deadlock at runtime:

```f90
integer :: i, x

!$omp teams distribute reduction(+:x)
do i=1, 10
  x = x + 1
end do
```

Preventing a redundant barrier from being added in that case addresses
this issue. Synchronization is already being handled by the `__kmpc_reduce` and
`__kmpc_end_reduce` runtime calls for the host, and by
`_omp_reduction_inter_warp_copy_func` for GPUs.
---
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp   | 14 +++++++++-----
 .../omptarget-teams-distribute-reduction.mlir      |  2 --
 .../Target/LLVMIR/omptarget-teams-reduction.mlir   |  1 -
 .../LLVMIR/openmp-teams-distribute-reduction.mlir  |  6 ------
 .../test/Target/LLVMIR/openmp-teams-reduction.mlir |  4 ----
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 571575762d54a..3e7a6c88aec3a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1468,14 +1468,18 @@ static LogicalResult createReductionsAndCleanup(
   if (!contInsertPoint->getBlock())
     return op->emitOpError() << "failed to convert reductions";
 
-  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
-      ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
+  llvm::OpenMPIRBuilder::InsertPointTy afterIP = *contInsertPoint;
+  if (!isTeamsReduction) {
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy barrierIP =
+        ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
 
-  if (failed(handleError(afterIP, *op)))
-    return failure();
+    if (failed(handleError(barrierIP, *op)))
+      return failure();
+    afterIP = *barrierIP;
+  }
 
   tempTerminator->eraseFromParent();
-  builder.restoreIP(*afterIP);
+  builder.restoreIP(afterIP);
 
   // after the construct, deallocate private reduction variables
   SmallVector<Region *> reductionRegions;
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
index b7cb1026967f3..8cb6594f9130e 100644
--- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir
@@ -56,8 +56,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 // CHECK: icmp eq i32 %[[MASTER]], 1
 // CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
 
-// CHECK: call void @__kmpc_barrier
-
 // CHECK: [[THEN]]:
 // CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
 // CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
index 36eb280dfcfa2..a6154235ec874 100644
--- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir
@@ -67,7 +67,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
 // CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
 // CHECK-NEXT: store i32 %[[FINAL_RESULT]]
 
-// CHECK: call void @__kmpc_barrier
 // CHECK: call void @__kmpc_target_deinit
 
 // CHECK: define internal void @[[OUTLINED]]
diff --git a/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
index 9e033f6a4da3c..ac58e07e3ba37 100644
--- a/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir
@@ -60,12 +60,6 @@ llvm.func @simple_teams_reduction_() attributes {fir.internal_name = "_QPsimple_
 // Non atomic version
 // CHECK: call void @__kmpc_end_reduce
 
-// Finalize
-// CHECK: br label %[[FINALIZE:.+]]
-
-// CHECK: [[FINALIZE]]:
-// CHECK: call void @__kmpc_barrier
-
 // Reduction function.
 // CHECK: define internal void @[[REDFUNC]]
 // CHECK: add i32
diff --git a/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
index 800a833cf5601..79110565455dd 100644
--- a/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-teams-reduction.mlir
@@ -62,12 +62,8 @@ llvm.func @simple_teams_only_reduction_() attributes {fir.internal_name = "_QPsi
 // CHECK-SAME: %[[REDARRAY]]
 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 
-// CHECK: [[FINALIZE:.+]]:
-// CHECK: call void @__kmpc_barrier
-
 // Non atomic version
 // CHECK: call void @__kmpc_end_reduce
-// CHECK: br label %[[FINALIZE]]
 
 // Atomic version not generated
 // CHECK: unreachable



More information about the Mlir-commits mailing list