[Mlir-commits] [mlir] [MLIR][OpenMP] Extend omp.private materialization support: `firstprivate` (PR #82164)
Kareem Ergawy
llvmlistbot at llvm.org
Tue Feb 27 20:21:06 PST 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/82164
>From 5c702d36dc35fd6ec2492740e05d7f28db08f9b8 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 18 Feb 2024 06:50:38 -0600
Subject: [PATCH] [MLIR][OpenMP] Extend `omp.private` materialization support:
`firstprivate`
Extends current support for delayed privatization during translation to
LLVM IR. This adds support for one-block `firstprivate` `omp.private`
ops.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 35 ++++--
.../Target/LLVMIR/openmp-firstprivate.mlir | 116 ++++++++++++++++++
2 files changed, 144 insertions(+), 7 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8c20689c4a39dd..4e921d790ce48c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1176,17 +1176,38 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
}();
if (privVar) {
+ Region &allocRegion = privatizerClone.getAllocRegion();
+
+ // If this is a `firstprivate` clause, prepare the `omp.private` op by:
if (privatizerClone.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate) {
- privatizerClone.emitOpError(
- "TODO: delayed privatization is not "
- "supported for `firstprivate` clauses yet.");
- bodyGenStatus = failure();
- return codeGenIP;
+ auto oldAllocBackBlock = std::prev(allocRegion.end());
+ omp::YieldOp oldAllocYieldOp =
+ llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
+
+ Region ©Region = privatizerClone.getCopyRegion();
+
+ mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
+ // 1. Cloning the `copy` region to the end of the `alloc` region.
+ copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
+ allocRegion.end());
+
+ auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
+ // 2. Merging the last `alloc` block with the first block in the `copy`
+ // region clone.
+ // 3. Re-mapping the first argument of the `copy` region to be the
+ // argument of the `alloc` region and the second argument of the `copy`
+ // region to be the yielded value of the `alloc` region (this is the
+ // private clone of the privatized value).
+ copyCloneBuilder.mergeBlocks(
+ &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
+ {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
+
+ // 4. The old terminator of the `alloc` region is not needed anymore, so
+ // delete it.
+ oldAllocYieldOp.erase();
}
- Region &allocRegion = privatizerClone.getAllocRegion();
-
// Replace the privatizer block argument with mlir value being privatized.
// This way, the body of the privatizer will be changed from using the
// region/block argument to the value being privatized.
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
new file mode 100644
index 00000000000000..65ae98b2a74c6e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -0,0 +1,116 @@
+// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
+// `omp.private` ops).
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
+ omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+
+omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %0, %arg1 : f32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+// CHECK-LABEL: @parallel_op_firstprivate
+// CHECK-SAME: (ptr %[[ORIG:.*]]) {
+// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
+// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
+// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
+// CHECK: }
+
+// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
+// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
+// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+
+// Check that the privatizer alloc region was inlined properly.
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
+
+// Check that the privatizer copy region was inlined properly.
+
+// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
+// CHECK-NEXT: br
+
+// Check that the privatized value is used (rather than the original one).
+// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
+// CHECK: }
+
+// -----
+
+llvm.func @parallel_op_firstprivate_multi_block(%arg0: !llvm.ptr) {
+ omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+ %0 = llvm.load %arg2 : !llvm.ptr -> f32
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK-LABEL: define internal void @parallel_op_firstprivate_multi_block..omp_par
+// CHECK: omp.par.entry:
+// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
+// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+// CHECK: br label %[[PRIV_BB1:.*]]
+
+// CHECK: [[PRIV_BB1]]:
+// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
+// only insruction is `llvm.mlir.constant` which gets translated to compile-time
+// constant in LLVM IR.
+// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
+
+// CHECK: [[PRIV_BB2]]:
+// CHECK-NEXT: %[[C1:.*]] = phi i32 [ 1, %[[PRIV_BB1]] ]
+// CHECK-NEXT: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[C1]], align 4
+// The entry block of the `copy` region is merged into the exit block of the
+// `alloc` region. So check for that.
+// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
+
+// Check contents of the 2nd block in the `copy` region.
+// CHECK: [[PRIV_BB3]]:
+// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %[[PRIV_BB2]] ]
+// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB2]] ]
+// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC2]], align 4
+// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
+
+// Check that the privatizer's continuation block yileds the private clone's
+// address.
+// CHECK: [[PRIV_CONT]]:
+// CHECK-NEXT: %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %[[PRIV_BB3]] ]
+// CHECK-NEXT: br label %[[PAR_REG:.*]]
+
+// Check that the body of the parallel region loads from the private clone.
+// CHECK: [[PAR_REG]]:
+// CHECK: %{{.*}} = load float, ptr %[[PRIV_ALLOC3]], align 4
+
+omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %c1 = llvm.mlir.constant(1 : i32) : i32
+ llvm.br ^bb1(%c1 : i32)
+
+^bb1(%arg1: i32):
+ %0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
+ omp.yield(%0 : !llvm.ptr)
+
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
+
+^bb1(%arg2: f32, %arg3: !llvm.ptr):
+ llvm.store %arg2, %arg3 : f32, !llvm.ptr
+ omp.yield(%arg3 : !llvm.ptr)
+}
More information about the Mlir-commits
mailing list