[Mlir-commits] [mlir] [MLIR][OpenMP] Extend `omp.private` materialization support: `firstprivate` (PR #83761)

Kareem Ergawy llvmlistbot at llvm.org
Sun Mar 3 20:31:52 PST 2024


https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/83761

Extends current support for delayed privatization during translation to LLVM IR. This adds support for one-block `firstprivate` `omp.private` ops.

>From 1ee197ed213ef22dd49f368c5dcb6fae63e6c293 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 18 Feb 2024 06:50:38 -0600
Subject: [PATCH] [MLIR][OpenMP] Extend `omp.private` materialization support:
 `firstprivate`

Extends current support for delayed privatization during translation to
LLVM IR. This adds support for one-block `firstprivate` `omp.private`
ops.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  35 ++++--
 .../Target/LLVMIR/openmp-firstprivate.mlir    | 116 ++++++++++++++++++
 2 files changed, 144 insertions(+), 7 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-firstprivate.mlir

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index fd1de274da60e8..bef227f2c583ed 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1190,17 +1190,38 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
     }();
 
     if (privVar) {
+      Region &allocRegion = privatizerClone.getAllocRegion();
+
+      // If this is a `firstprivate` clause, prepare the `omp.private` op by:
       if (privatizerClone.getDataSharingType() ==
           omp::DataSharingClauseType::FirstPrivate) {
-        privatizerClone.emitOpError(
-            "TODO: delayed privatization is not "
-            "supported for `firstprivate` clauses yet.");
-        bodyGenStatus = failure();
-        return codeGenIP;
+        auto oldAllocBackBlock = std::prev(allocRegion.end());
+        omp::YieldOp oldAllocYieldOp =
+            llvm::cast<omp::YieldOp>(oldAllocBackBlock->getTerminator());
+
+        Region &copyRegion = privatizerClone.getCopyRegion();
+
+        mlir::IRRewriter copyCloneBuilder(&moduleTranslation.getContext());
+        // 1. Cloning the `copy` region to the end of the `alloc` region.
+        copyCloneBuilder.cloneRegionBefore(copyRegion, allocRegion,
+                                           allocRegion.end());
+
+        auto newCopyRegionFrontBlock = std::next(oldAllocBackBlock);
+        // 2. Merging the last `alloc` block with the first block in the `copy`
+        // region clone.
+        // 3. Re-mapping the first argument of the `copy` region to be the
+        // argument of the `alloc` region and the second argument of the `copy`
+        // region to be the yielded value of the `alloc` region (this is the
+        // private clone of the privatized value).
+        copyCloneBuilder.mergeBlocks(
+            &*newCopyRegionFrontBlock, &*oldAllocBackBlock,
+            {allocRegion.getArgument(0), oldAllocYieldOp.getOperand(0)});
+
+        // 4. The old terminator of the `alloc` region is not needed anymore, so
+        // delete it.
+        oldAllocYieldOp.erase();
       }
 
-      Region &allocRegion = privatizerClone.getAllocRegion();
-
       // Replace the privatizer block argument with mlir value being privatized.
       // This way, the body of the privatizer will be changed from using the
       // region/block argument to the value being privatized.
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
new file mode 100644
index 00000000000000..65ae98b2a74c6e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -0,0 +1,116 @@
+// Test code-gen for `omp.parallel` ops with delayed privatizers (i.e. using
+// `omp.private` ops).
+
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+llvm.func @parallel_op_firstprivate(%arg0: !llvm.ptr) {
+  omp.parallel private(@x.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+    %0 = llvm.load %arg2 : !llvm.ptr -> f32
+    omp.terminator
+  }
+  llvm.return
+}
+
+omp.private {type = firstprivate} @x.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
+  omp.yield(%0 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> f32
+  llvm.store %0, %arg1 : f32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+// CHECK-LABEL: @parallel_op_firstprivate
+// CHECK-SAME: (ptr %[[ORIG:.*]]) {
+// CHECK: %[[OMP_PAR_ARG:.*]] = alloca { ptr }, align 8
+// CHECK: %[[ORIG_GEP:.*]] = getelementptr { ptr }, ptr %[[OMP_PAR_ARG]], i32 0, i32 0
+// CHECK: store ptr %[[ORIG]], ptr %[[ORIG_GEP]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @parallel_op_firstprivate..omp_par, ptr %[[OMP_PAR_ARG]])
+// CHECK: }
+
+// CHECK-LABEL: void @parallel_op_firstprivate..omp_par
+// CHECK-SAME: (ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %[[ARG:.*]])
+// CHECK: %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[ARG]], i32 0, i32 0
+// CHECK: %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+
+// Check that the privatizer alloc region was inlined properly.
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, align 4
+
+// Check that the privatizer copy region was inlined properly.
+
+// CHECK: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK: store float %[[ORIG_VAL]], ptr %[[PRIV_ALLOC]], align 4
+// CHECK-NEXT: br
+
+// Check that the privatized value is used (rather than the original one).
+// CHECK: load float, ptr %[[PRIV_ALLOC]], align 4
+// CHECK: }
+
+// -----
+
+llvm.func @parallel_op_firstprivate_multi_block(%arg0: !llvm.ptr) {
+  omp.parallel private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) {
+    %0 = llvm.load %arg2 : !llvm.ptr -> f32
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @parallel_op_firstprivate_multi_block..omp_par
+// CHECK: omp.par.entry:
+// CHECK:  %[[ORIG_PTR_PTR:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
+// CHECK:  %[[ORIG_PTR:.*]] = load ptr, ptr %[[ORIG_PTR_PTR]], align 8
+// CHECK:   br label %[[PRIV_BB1:.*]]
+
+// CHECK: [[PRIV_BB1]]:
+// The 1st `alloc` block directly branches to the 2nd `alloc` block since the
+// only insruction is `llvm.mlir.constant` which gets translated to compile-time
+// constant in LLVM IR.
+// CHECK-NEXT: br label %[[PRIV_BB2:.*]]
+
+// CHECK: [[PRIV_BB2]]:
+// CHECK-NEXT: %[[C1:.*]] = phi i32 [ 1, %[[PRIV_BB1]] ]
+// CHECK-NEXT: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[C1]], align 4
+// The entry block of the `copy` region is merged into the exit block of the
+// `alloc` region. So check for that.
+// CHECK-NEXT: %[[ORIG_VAL:.*]] = load float, ptr %[[ORIG_PTR]], align 4
+// CHECK-NEXT: br label %[[PRIV_BB3:.*]]
+
+// Check contents of the 2nd block in the `copy` region.
+// CHECK: [[PRIV_BB3]]:
+// CHECK-NEXT: %[[ORIG_VAL2:.*]] = phi float [ %[[ORIG_VAL]], %[[PRIV_BB2]] ]
+// CHECK-NEXT: %[[PRIV_ALLOC2:.*]] = phi ptr [ %[[PRIV_ALLOC]], %[[PRIV_BB2]] ]
+// CHECK-NEXT: store float %[[ORIG_VAL2]], ptr %[[PRIV_ALLOC2]], align 4
+// CHECK-NEXT: br label %[[PRIV_CONT:.*]]
+
+// Check that the privatizer's continuation block yileds the private clone's
+// address.
+// CHECK: [[PRIV_CONT]]:
+// CHECK-NEXT:   %[[PRIV_ALLOC3:.*]] = phi ptr [ %[[PRIV_ALLOC2]], %[[PRIV_BB3]] ]
+// CHECK-NEXT:   br label %[[PAR_REG:.*]]
+
+// Check that the body of the parallel region loads from the private clone.
+// CHECK: [[PAR_REG]]:
+// CHECK:        %{{.*}} = load float, ptr %[[PRIV_ALLOC3]], align 4
+
+omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  llvm.br ^bb1(%c1 : i32)
+
+^bb1(%arg1: i32):
+  %0 = llvm.alloca %arg1 x f32 : (i32) -> !llvm.ptr
+  omp.yield(%0 : !llvm.ptr)
+
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> f32
+  llvm.br ^bb1(%0, %arg1 : f32, !llvm.ptr)
+
+^bb1(%arg2: f32, %arg3: !llvm.ptr):
+  llvm.store %arg2, %arg3 : f32, !llvm.ptr
+  omp.yield(%arg3 : !llvm.ptr)
+}



More information about the Mlir-commits mailing list