[Mlir-commits] [mlir] [OpenMP][LLVM] Fix access to reduction args of `omp.parallel`. (PR #96426)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Jun 23 03:22:37 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-flang-openmp
Author: Kareem Ergawy (ergawy)
<details>
<summary>Changes</summary>
Fix for Fujitsu test suite test: 0275_0032.f90. The MLIR to LLVM translation logic assumed that reduction arguments to an `omp.parallel` op are always the last set of arguments to the op. However, this is a wrong assumption since private args come afterward.
---
Full diff: https://github.com/llvm/llvm-project/pull/96426.diff
3 Files Affected:
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+4)
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+17-14)
- (modified) mlir/test/Target/LLVMIR/openmp-private.mlir (+50)
``````````diff
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index f28911adccd02..633a69c5893f9 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -179,6 +179,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
OpBuilder<(ins CArg<"const ParallelClauseOps &">:$clauses)>
];
let extraClassDeclaration = [{
+ unsigned getNumAllocateVars() { return getAllocateVars().size(); }
+
+ unsigned getNumAllocatorsVars() { return getAllocatorsVars().size(); }
+
/// Returns the number of reduction variables.
unsigned getNumReductionVars() { return getReductionVars().size(); }
}];
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 52c780f655834..4d6c900f0fe3a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -773,8 +773,8 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
/// Allocate space for privatized reduction variables.
template <typename T>
static void allocByValReductionVars(
- T loop, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
+ T loop, MutableArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
SmallVectorImpl<llvm::Value *> &privateReductionVariables,
@@ -782,15 +782,13 @@ static void allocByValReductionVars(
llvm::ArrayRef<bool> isByRefs) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
- auto args =
- loop.getRegion().getArguments().take_back(loop.getNumReductionVars());
for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
if (isByRefs[i])
continue;
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
- moduleTranslation.mapValue(args[i], var);
+ moduleTranslation.mapValue(reductionArgs[i], var);
privateReductionVariables[i] = var;
reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
}
@@ -925,14 +923,17 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables,
+
+ MutableArrayRef<BlockArgument> reductionArgs =
+ wsloopOp.getRegion().getArguments();
+
+ allocByValReductionVars(wsloopOp, reductionArgs, builder, moduleTranslation,
+ allocaIP, reductionDecls, privateReductionVariables,
reductionVariableMap, isByRef);
// Before the loop, store the initial values of reductions into reduction
// variables. Although this could be done after allocas, we don't want to mess
// up with the alloca insertion point.
- ArrayRef<BlockArgument> reductionArgs = wsloopOp.getRegion().getArguments();
for (unsigned i = 0; i < wsloopOp.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
@@ -1158,16 +1159,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Allocate reduction vars
DenseMap<Value, llvm::Value *> reductionVariableMap;
- allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables,
- reductionVariableMap, isByRef);
- // Initialize reduction vars
- builder.restoreIP(allocaIP);
MutableArrayRef<BlockArgument> reductionArgs =
- opInst.getRegion().getArguments().take_back(
+ opInst.getRegion().getArguments().slice(
+ opInst.getNumAllocateVars() + opInst.getNumAllocatorsVars(),
opInst.getNumReductionVars());
+ allocByValReductionVars(opInst, reductionArgs, builder, moduleTranslation,
+ allocaIP, reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef);
+
+ // Initialize reduction vars
+ builder.restoreIP(allocaIP);
llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
allocaIP =
InsertPointTy(allocaIP.getBlock(),
diff --git a/mlir/test/Target/LLVMIR/openmp-private.mlir b/mlir/test/Target/LLVMIR/openmp-private.mlir
index 141724eaf56f9..e8bfacdd74c32 100644
--- a/mlir/test/Target/LLVMIR/openmp-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-private.mlir
@@ -184,3 +184,53 @@ llvm.mlir.global linkonce constant @_QQfoo() {addr_space = 0 : i32} : !llvm.arra
}
llvm.func @bar(!llvm.ptr)
+
+
+// Tests fix for Fujitsu test suite test: 0275_0032.f90. The MLIR to LLVM
+// translation logic assumed that reduction arguments to an `omp.parallel`
+// op are always the last set of arguments to the op. However, this is a
+// wrong assumption since private args come afterward. This tests the fix
+// that we access the different sets of args properly.
+
+// CHECK-LABEL: define internal void @private_and_reduction_..omp_par
+// CHECK: %[[RED_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
+// CHECK: %[[PRV_ALLOC:.*]] = alloca float, i64 1, align 4
+
+// CHECK: omp.par.region:
+// CHECK: br label %[[PAR_REG_BEG:.*]]
+// CHECK: [[PAR_REG_BEG]]:
+// CHECK-NEXT: %{{.*}} = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[RED_ALLOC]], align 8
+// CHECK-NEXT: store float 8.000000e+00, ptr %[[PRV_ALLOC]], align 4
+
+llvm.func @private_and_reduction_() attributes {fir.internal_name = "_QPprivate_and_reduction", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
+ %2 = llvm.alloca %0 x f32 {bindc_name = "to_priv"} : (i64) -> !llvm.ptr
+ omp.parallel reduction(byref @reducer.part %1 -> %arg0 : !llvm.ptr) private(@privatizer.part %2 -> %arg1 : !llvm.ptr) {
+ %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %4 = llvm.mlir.constant(8.000000e+00 : f32) : f32
+ llvm.store %4, %arg1 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+omp.private {type = private} @privatizer.part : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "to_priv", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+
+omp.declare_reduction @reducer.part : !llvm.ptr init {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+} combiner {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ omp.yield(%arg0 : !llvm.ptr)
+} cleanup {
+^bb0(%arg0: !llvm.ptr):
+ omp.yield
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/96426
More information about the Mlir-commits
mailing list