[Mlir-commits] [llvm] [mlir] [MLIR][OpenMP] Extend omp.private materialization support: `dealloc` (PR #90841)
Kareem Ergawy
llvmlistbot at llvm.org
Thu May 2 02:57:48 PDT 2024
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/90841
Extends current support for delayed privatization during translation to LLVM IR. This adds support for materlizaing the `dealloc` region in `omp.private` ops when this region contains clean-up/deallocation logic that needs to be executed at the end of the parallel region.
>From c0ec30f8a351b6fd25506ce751c810d39ad366bd Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 1 May 2024 01:39:19 -0500
Subject: [PATCH] [MLIR][OpenMP] Extend omp.private materialization support:
`dealloc`
Extends current support for delayed privatization during translation to
LLVM IR. This adds support for materlizaing the `dealloc` region in
`omp.private` ops when this region contains clean-up/deallocation logic
that needs to be executed at the end of the parallel region.
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 26 +++---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 80 ++++++++++++++-----
.../LLVMIR/openmp-omp.private-dealloc.mlir | 53 ++++++++++++
3 files changed, 126 insertions(+), 33 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-omp.private-dealloc.mlir
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 4d2d352f7520b2..9f0c07ef0da910 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1500,19 +1500,6 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
};
}
- // Adjust the finalization stack, verify the adjustment, and call the
- // finalize function a last time to finalize values between the pre-fini
- // block and the exit block if we left the parallel "the normal way".
- auto FiniInfo = FinalizationStack.pop_back_val();
- (void)FiniInfo;
- assert(FiniInfo.DK == OMPD_parallel &&
- "Unexpected finalization stack state!");
-
- Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
-
- InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
- FiniCB(PreFiniIP);
-
OI.OuterAllocaBB = OuterAllocaBlock;
OI.EntryBB = PRegEntryBB;
OI.ExitBB = PRegExitBB;
@@ -1637,6 +1624,19 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
dbgs() << " PBR: " << BB->getName() << "\n";
});
+ // Adjust the finalization stack, verify the adjustment, and call the
+ // finalize function a last time to finalize values between the pre-fini
+ // block and the exit block if we left the parallel "the normal way".
+ auto FiniInfo = FinalizationStack.pop_back_val();
+ (void)FiniInfo;
+ assert(FiniInfo.DK == OMPD_parallel &&
+ "Unexpected finalization stack state!");
+
+ Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
+
+ InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
+ FiniCB(PreFiniIP);
+
// Register the outlined info.
addOutlineInfo(std::move(OI));
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9f87f89d8c636b..6cfe5c038afa42 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -33,6 +33,7 @@
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <any>
+#include <iterator>
#include <optional>
#include <utility>
@@ -878,36 +879,40 @@ static void collectReductionInfo(
}
/// handling of DeclareReductionOp's cleanup region
-static LogicalResult inlineReductionCleanup(
- llvm::SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
- llvm::ArrayRef<llvm::Value *> privateReductionVariables,
- LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) {
- for (auto [i, reductionDecl] : llvm::enumerate(reductionDecls)) {
- Region &cleanupRegion = reductionDecl.getCleanupRegion();
- if (cleanupRegion.empty())
+static LogicalResult
+inlineOmpRegionCleanup(llvm::SmallVectorImpl<Region *> &cleanupRegions,
+ llvm::ArrayRef<llvm::Value *> privateVariables,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder, StringRef regionName,
+ bool shouldLoadCleanupRegionArg = true) {
+ for (auto [i, cleanupRegion] : llvm::enumerate(cleanupRegions)) {
+ if (cleanupRegion->empty())
continue;
// map the argument to the cleanup region
- Block &entry = cleanupRegion.front();
+ Block &entry = cleanupRegion->front();
llvm::Instruction *potentialTerminator =
builder.GetInsertBlock()->empty() ? nullptr
: &builder.GetInsertBlock()->back();
if (potentialTerminator && potentialTerminator->isTerminator())
builder.SetInsertPoint(potentialTerminator);
- llvm::Value *reductionVar = builder.CreateLoad(
- moduleTranslation.convertType(entry.getArgument(0).getType()),
- privateReductionVariables[i]);
+ llvm::Value *prviateVarValue =
+ shouldLoadCleanupRegionArg
+ ? builder.CreateLoad(
+ moduleTranslation.convertType(entry.getArgument(0).getType()),
+ privateVariables[i])
+ : privateVariables[i];
- moduleTranslation.mapValue(entry.getArgument(0), reductionVar);
+ moduleTranslation.mapValue(entry.getArgument(0), prviateVarValue);
- if (failed(inlineConvertOmpRegions(cleanupRegion, "omp.reduction.cleanup",
- builder, moduleTranslation)))
+ if (failed(inlineConvertOmpRegions(*cleanupRegion, regionName, builder,
+ moduleTranslation)))
return failure();
// clear block argument mapping in case it needs to be re-created with a
// different source for another use of the same reduction decl
- moduleTranslation.forgetMapping(cleanupRegion);
+ moduleTranslation.forgetMapping(*cleanupRegion);
}
return success();
}
@@ -1110,8 +1115,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
builder.restoreIP(nextInsertionPoint);
// after the workshare loop, deallocate private reduction variables
- return inlineReductionCleanup(reductionDecls, privateReductionVariables,
- moduleTranslation, builder);
+ SmallVector<Region *> reductionRegions;
+ llvm::transform(reductionDecls, std::back_inserter(reductionRegions),
+ [](omp::DeclareReductionOp reductionDecl) {
+ return &reductionDecl.getCleanupRegion();
+ });
+ return inlineOmpRegionCleanup(reductionRegions, privateReductionVariables,
+ moduleTranslation, builder,
+ "omp.reduction.cleanup");
}
/// A RAII class that on construction replaces the region arguments of the
@@ -1267,6 +1278,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
}
};
+ SmallVector<omp::PrivateClauseOp> privatizerClones;
+ SmallVector<llvm::Value *> privateVariables;
+
+
// TODO: Perform appropriate actions according to the data-sharing
// attribute (shared, private, firstprivate, ...) of variables.
// Currently shared and private are supported.
@@ -1356,12 +1371,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
opInst.emitError("failed to inline `alloc` region of an `omp.private` "
"op in the parallel region");
bodyGenStatus = failure();
+ privatizerClone.erase();
} else {
assert(yieldedValues.size() == 1);
replacementValue = yieldedValues.front();
+
+ // Keep the LLVM replacement value and the op clone in case we need to
+ // emit cleanup (i.e. deallocation) logic.
+ privateVariables.push_back(replacementValue);
+ privatizerClones.push_back(privatizerClone);
}
- privatizerClone.erase();
builder.restoreIP(oldIP);
}
@@ -1376,8 +1396,25 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// if the reduction has a cleanup region, inline it here to finalize the
// reduction variables
- if (failed(inlineReductionCleanup(reductionDecls, privateReductionVariables,
- moduleTranslation, builder)))
+ SmallVector<Region *> reductionCleanupRegions;
+ llvm::transform(reductionDecls, std::back_inserter(reductionCleanupRegions),
+ [](omp::DeclareReductionOp reductionDecl) {
+ return &reductionDecl.getCleanupRegion();
+ });
+ if (failed(inlineOmpRegionCleanup(
+ reductionCleanupRegions, privateReductionVariables, moduleTranslation,
+ builder, "omp.reduction.cleanup")))
+ bodyGenStatus = failure();
+
+ SmallVector<Region *> privateCleanupRegions;
+ llvm::transform(privatizerClones, std::back_inserter(privateCleanupRegions),
+ [](omp::PrivateClauseOp privatizer) {
+ return &privatizer.getDeallocRegion();
+ });
+
+ if (failed(inlineOmpRegionCleanup(
+ privateCleanupRegions, privateVariables, moduleTranslation, builder,
+ "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
bodyGenStatus = failure();
builder.restoreIP(oldIP);
@@ -1403,6 +1440,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
ifCond, numThreads, pbKind, isCancellable));
+ for (mlir::omp::PrivateClauseOp privatizerClone : privatizerClones)
+ privatizerClone.erase();
+
return bodyGenStatus;
}
diff --git a/mlir/test/Target/LLVMIR/openmp-omp.private-dealloc.mlir b/mlir/test/Target/LLVMIR/openmp-omp.private-dealloc.mlir
new file mode 100644
index 00000000000000..835caccb262c46
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-omp.private-dealloc.mlir
@@ -0,0 +1,53 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+llvm.func @free(!llvm.ptr)
+
+llvm.func @parallel_op_dealloc(%arg0: !llvm.ptr) {
+ omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+
+omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %0, %arg1 : f32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+} dealloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.ptrtoint %arg0 : !llvm.ptr to i64
+ %c0 = llvm.mlir.constant(0 : i64) : i64
+ %1 = llvm.icmp "ne" %0, %c0 : i64
+ llvm.cond_br %1, ^bb1, ^bb2
+
+^bb1:
+ llvm.call @free(%arg0) : (!llvm.ptr) -> ()
+ llvm.br ^bb2
+
+^bb2:
+ omp.yield
+}
+
+// CHECK-LABEL: define internal void @parallel_op_dealloc..omp_par
+// CHECK: %[[LOCAL_ALLOC:.*]] = alloca float, align 4
+
+// CHECK: omp.par.pre_finalize:
+// CHECK: br label %[[DEALLOC_REG_START:.*]]
+
+// CHECK: [[DEALLOC_REG_START]]:
+// CHECK: %[[LOCAL_ALLOC_CONV:.*]] = ptrtoint ptr %[[LOCAL_ALLOC]] to i64
+// CHECK: %[[COND:.*]] = icmp ne i64 %[[LOCAL_ALLOC_CONV]], 0
+// CHECK: br i1 %[[COND]], label %[[DEALLOC_REG_BB1:.*]], label %[[DEALLOC_REG_BB2:.*]]
+
+// CHECK: [[DEALLOC_REG_BB2]]:
+
+// CHECK: [[DEALLOC_REG_BB1]]:
+// CHECK-NEXT: call void @free(ptr %[[LOCAL_ALLOC]])
+// CHECK-NEXT: br label %[[DEALLOC_REG_BB2]]
More information about the Mlir-commits
mailing list