[Mlir-commits] [llvm] [mlir] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for `private` clause on `target` constructs (PR #105471)

Pranav Bhandarkar llvmlistbot at llvm.org
Mon Aug 26 05:40:54 PDT 2024


https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/105471

>From 9f28204b6ab27f32105ed79a4300a116966557dc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 14 Aug 2024 11:25:31 -0500
Subject: [PATCH 1/4] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR
 for `private` clause on `target` constructs

This patch contains support to lower the `private` clause when used while offloading.
We implement this by inlining the `alloc` region of `omp.private` at the start
of the `target` region and adjusting the uses so that the values yielded from the `alloc`
region are used in the `target` region.

At this point, we do not handle the following (the first of the two below is being worked on)
- allocatables because we do not handle `dealloc` regions in this patch.
- firstprivate because there isn't support to lower firstprivate to mlir yet.
---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |   7 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 132 +++++++++++++++++-
 mlir/test/Dialect/OpenMP/invalid.mlir         |   2 +-
 .../Target/LLVMIR/openmp-target-private.mlir  |  73 ++++++++++
 .../offloading/fortran/target-private-map.f90 |  41 ++++++
 5 files changed, 252 insertions(+), 3 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private.mlir
 create mode 100644 offload/test/offloading/fortran/target-private-map.f90

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..31d97313aedb7a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2494,7 +2494,12 @@ LogicalResult PrivateClauseOp::verify() {
              << "Did not expect any values to be yielded.";
     }
 
-    if (yieldedTypes.size() == 1 && yieldedTypes.front() == symType)
+    if (yieldedTypes.size() != 1)
+      return mlir::emitError(terminator->getLoc())
+             << "Expected exactly 1 yielded value, but got "
+             << yieldedTypes.size();
+
+    if (yieldedTypes.front() == symType)
       return success();
 
     auto error = mlir::emitError(yieldOp.getLoc())
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 458d05d5059db7..af48b5379140c4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
 #include "mlir/Analysis/TopologicalSortUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/IRMapping.h"
@@ -1480,6 +1481,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
               counter);
 
           clone.setSymName(cloneName);
+
           return {privVar, clone};
         }
       }
@@ -3241,12 +3243,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
   SmallVector<Value> mapVars = targetOp.getMapVars();
   llvm::Function *llvmOutlinedFn = nullptr;
-
   // TODO: It can also be false if a compile-time constant `false` IF clause is
   // specified.
   bool isOffloadEntry =
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
+  if (!targetOp.getPrivateVars().empty()) {
+    auto privateVars = targetOp.getPrivateVars();
+    auto privateSyms = targetOp.getPrivateSyms();
+    auto &firstTargetRegion = opInst.getRegion(0);
+    auto &firstTargetBlock = firstTargetRegion.front();
+    auto *regionArgsStart = firstTargetBlock.getArguments().begin();
+    auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
+    auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
+    BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false);
+    struct omp::PrivateClauseOps newPrivateClauses;
+    MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd);
+    for (auto [privVar, privatizerNameAttr, blockArg] :
+         llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) {
+
+      SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
+
+      // 1. Clone the privatizer so that we can make changes to it freely
+      MLIRContext &context = moduleTranslation.getContext();
+      mlir::IRRewriter rewriter(&context);
+      omp::PrivateClauseOp privatizer =
+          SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(&opInst,
+                                                                     privSym);
+      // Handle only private for now. Also, not handling allocatables yet.
+      if (privatizer.getDataSharingType() !=
+              omp::DataSharingClauseType::Private ||
+          !privatizer.getDeallocRegion().empty()) {
+        newPrivateClauses.privateSyms.push_back(privSym);
+        newPrivateClauses.privateVars.push_back(privVar);
+        continue;
+      }
+
+      auto &allocRegion = privatizer.getAllocRegion();
+      // `alloc` region should have only 1 argument
+      assert(allocRegion.getNumArguments() == 1 &&
+             "alloc region of omp::PrivateClauseOp should have one argument");
+      auto allocRegionArg = allocRegion.getArgument(0);
+      // Assuming we have the following targetOp and Privatizer
+      //
+      // omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
+      //   %1 = alloca...
+      //   omp.yield(%1)
+      // }
+      // omp.target map(..) map(..) private(privVar) {
+      //  ^bb0(map_arg0, ..., map_argn, priv_arg0):
+      //    ...
+      //    ...
+      //   store %v, %priv_arg0
+      //   omp.terminator
+      // }
+      // Roughly, we do the following -
+      // Split the first block (bb0) of the first region of the target into
+      // two block. Then clone the alloc region of the privatizer between the
+      // two new blocks. When cloning replace the alloc argument with privVar.
+      // We'll then have
+      //
+      // omp.target map(..) map(..) private(privVar) {
+      //  ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+      //  ^bb1:  (cloned region)  // no predecessor
+      //   %1 = alloca...
+      //   omp.yield(%1)
+      //  ^bb2:                   // no predecessor
+      //    ...
+      //    ...
+      //   store %v, %priv_arg0
+      //   omp.terminator
+      // }
+      // Next, add an unconditional branch from ^bb0 to ^bb1 and change the
+      // yield in the last block of the cloned alloc region to an unconditional
+      // branch before replacing all uses of 'priv_arg0' with the yielded value
+      // to finally get the following
+      //
+      // omp.target map(..) map(..) private(privVar) {
+      //  ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+      //   llvm.br ^bb1
+      //  ^bb1:  (cloned region) // pred: ^bb0
+      //   %1 = alloca...
+      //   llvm.br ^bb2
+      //  ^bb2:                  // pred: ^bb1
+      //    ...
+      //    ...
+      //   store %v, %1          // %priv_arg0 replaced with the yield value
+      //   omp.terminator
+      // }
+      Block &firstBlock = firstTargetRegion.front();
+      Block *newBlock = rewriter.splitBlock(&firstBlock, firstBlock.begin());
+      rewriter.setInsertionPointToStart(&firstBlock);
+      Location loc = targetOp.getLoc();
+      rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock);
+
+      IRMapping cloneMap;
+      cloneMap.map(allocRegionArg, privVar);
+      auto secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+      allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap);
+
+      unsigned allocRegNumBlocks = allocRegion.getBlocks().size();
+      secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+      auto clonedAllocRegionEndIter =
+          std::next(secondBlockIter, (allocRegNumBlocks - 1));
+      Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
+
+      Operation *br = firstBlock.getTerminator();
+      LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
+      Operation *yield = clonedAllocRegEndBlock.getTerminator();
+      omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
+      auto newValue = yieldOp.getResults().front();
+      rewriter.setInsertionPointAfter(yield);
+      auto *oldSucc = brOp.getSuccessor();
+      brOp.setSuccessor(&*secondBlockIter);
+      // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock
+      // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc?
+      rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc);
+      blockArgsBV.set(blockArg.getArgNumber());
+      rewriter.replaceAllUsesWith(blockArg, newValue);
+      rewriter.eraseOp(yield);
+    }
+    // Do some fix ups now.
+    // First, remove the blockArguments that we just privatized
+    firstTargetBlock.eraseArguments(blockArgsBV);
+    // Then remove the private vars and privatizers that we have
+    // processed i.e privatized just now.
+    if (newPrivateClauses.privateSyms.empty()) {
+      targetOp.getPrivateVarsMutable().clear();
+      targetOp.removePrivateSymsAttr();
+    } else {
+      targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get(
+          targetOp.getContext(), newPrivateClauses.privateSyms));
+      targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars);
+    }
+  }
   LogicalResult bodyGenStatus = success();
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP,
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 1d1d93f0977588..e3267c520490a2 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2219,7 +2219,7 @@ omp.private {type = private} @x.privatizer : i32 alloc {
 
 omp.private {type = private} @x.privatizer : i32 alloc {
 ^bb0(%arg0: i32):
-  // expected-error @below {{Invalid yielded value. Expected type: 'i32', got: None}}
+  // expected-error @below {{Expected exactly 1 yielded value, but got 0}}
   omp.yield
 }
 
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
new file mode 100644
index 00000000000000..38c93b3f0568f8
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -0,0 +1,73 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
+  omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(2 : i32) : i32
+  llvm.store %4, %3 : i32, !llvm.ptr
+  %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+  omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+    %6 = llvm.mlir.constant(10 : i32) : i32
+    %7 = llvm.load %arg0 : !llvm.ptr -> i32
+    %8 = llvm.add %7, %6 : i32
+    llvm.store %8, %arg1 : i32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
+
+omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
+  omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(1 : i64) : i64
+  %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(2 : i32) : i32
+  llvm.store %6, %5 : i32, !llvm.ptr
+  %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+  omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+    %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
+    %9 = llvm.mlir.constant(10 : i32) : i32
+    %10 = llvm.load %arg0 : !llvm.ptr -> i32
+    %11 = llvm.add %10, %9 : i32
+    llvm.store %11, %arg1 : i32, !llvm.ptr
+    %12 = llvm.load %arg1 : !llvm.ptr -> i32
+    %13 = llvm.sitofp %12 : i32 to f32
+    %14 = llvm.fadd %13, %8  {fastmathFlags = #llvm.fastmath<contract>} : f32
+    llvm.store %14, %arg2 : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
+// CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4
+// CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
+// CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
+// CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
+      
diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90
new file mode 100644
index 00000000000000..861a1886c03844
--- /dev/null
+++ b/offload/test/offloading/fortran/target-private-map.f90
@@ -0,0 +1,41 @@
+! Test target private
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+  integer :: a = 0
+  call target_private(a)
+  print*, "======= FORTRAN Test passed! ======="
+  print*, "foo(a) should not return 20, got " , a
+  if (a /= 20) then
+     stop 0
+  else
+     stop 1
+  end if
+  
+  !       stop 0
+end program main
+subroutine target_private(r)
+  implicit none
+  integer, dimension(2) :: simple_vars
+  integer :: a, r
+  ! set a to 10
+  a = 5
+  simple_vars(1) = a
+  simple_vars(2) = a
+  !$omp target map(tofrom: simple_vars) private(a)
+  ! Without private(a), a would be firstprivate, meaning it's value would be 5
+  ! with private(a), it's value would be uninitialized, which means it'd have
+  ! a very small chance of being 5.
+  ! So, simple_vars(1|2) should be 5 + a_private
+  simple_vars(1) = simple_vars(1) + a
+  simple_vars(2) = simple_vars(2) + a
+  !$omp end target
+  r = simple_vars(1) + simple_vars(2)
+end subroutine target_private

>From 08ff51e8b852a7d08dcfbed31e0f6fb907c22b52 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 20 Aug 2024 23:50:19 -0500
Subject: [PATCH 2/4] remove a new line added accidently

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index af48b5379140c4..d2f90615eb8e0f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1481,7 +1481,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
               counter);
 
           clone.setSymName(cloneName);
-
           return {privVar, clone};
         }
       }

>From 39a4db4c501ca44e0a67d88097a57ab2a2f25b99 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 21 Aug 2024 11:54:32 -0500
Subject: [PATCH 3/4] Address review comments

---
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d2f90615eb8e0f..1ea8b80e15711d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3248,15 +3248,15 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
   if (!targetOp.getPrivateVars().empty()) {
-    auto privateVars = targetOp.getPrivateVars();
-    auto privateSyms = targetOp.getPrivateSyms();
+    OperandRange privateVars = targetOp.getPrivateVars();
+    std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms();
     auto &firstTargetRegion = opInst.getRegion(0);
     auto &firstTargetBlock = firstTargetRegion.front();
     auto *regionArgsStart = firstTargetBlock.getArguments().begin();
     auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
     auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
     BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false);
-    struct omp::PrivateClauseOps newPrivateClauses;
+    omp::PrivateClauseOps newPrivateClauses;
     MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd);
     for (auto [privVar, privatizerNameAttr, blockArg] :
          llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) {
@@ -3330,9 +3330,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       //   store %v, %1          // %priv_arg0 replaced with the yield value
       //   omp.terminator
       // }
-      Block &firstBlock = firstTargetRegion.front();
-      Block *newBlock = rewriter.splitBlock(&firstBlock, firstBlock.begin());
-      rewriter.setInsertionPointToStart(&firstBlock);
+      Block *newBlock =
+          rewriter.splitBlock(&firstTargetBlock, firstTargetBlock.begin());
+      rewriter.setInsertionPointToStart(&firstTargetBlock);
       Location loc = targetOp.getLoc();
       rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock);
 
@@ -3347,7 +3347,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
           std::next(secondBlockIter, (allocRegNumBlocks - 1));
       Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
 
-      Operation *br = firstBlock.getTerminator();
+      Operation *br = firstTargetBlock.getTerminator();
       LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
       Operation *yield = clonedAllocRegEndBlock.getTerminator();
       omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);

>From 54ff624ddf69d3d107a4ba6cc29f49cd4a59d20f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 26 Aug 2024 07:40:30 -0500
Subject: [PATCH 4/4] Address review comments

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 44 +++++++++----------
 .../Target/LLVMIR/openmp-target-private.mlir  |  7 ++-
 .../offloading/fortran/target-private-map.f90 |  2 +-
 3 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1ea8b80e15711d..29ab6f987906fc 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3250,8 +3250,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   if (!targetOp.getPrivateVars().empty()) {
     OperandRange privateVars = targetOp.getPrivateVars();
     std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms();
-    auto &firstTargetRegion = opInst.getRegion(0);
-    auto &firstTargetBlock = firstTargetRegion.front();
+    auto &targetRegion = opInst.getRegion(0);
+    auto &firstTargetBlock = targetRegion.front();
     auto *regionArgsStart = firstTargetBlock.getArguments().begin();
     auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
     auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
@@ -3263,7 +3263,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
 
       SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
 
-      // 1. Clone the privatizer so that we can make changes to it freely
+      // 1. Look up the privatizer in the symbol table
       MLIRContext &context = moduleTranslation.getContext();
       mlir::IRRewriter rewriter(&context);
       omp::PrivateClauseOp privatizer =
@@ -3297,9 +3297,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       //   omp.terminator
       // }
       // Roughly, we do the following -
-      // Split the first block (bb0) of the first region of the target into
-      // two block. Then clone the alloc region of the privatizer between the
-      // two new blocks. When cloning replace the alloc argument with privVar.
+      // Split the first block (bb0) of the target region into two blocks.
+      // Then, clone the alloc region of the privatizer between the two
+      // new blocks. When cloning replace the alloc argument with privVar.
       // We'll then have
       //
       // omp.target map(..) map(..) private(privVar) {
@@ -3315,8 +3315,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
       // }
       // Next, add an unconditional branch from ^bb0 to ^bb1 and change the
       // yield in the last block of the cloned alloc region to an unconditional
-      // branch before replacing all uses of 'priv_arg0' with the yielded value
-      // to finally get the following
+      // branch to the other split block before replacing all uses of
+      // 'priv_arg0' with the yielded value to finally get the following code
+      // structure
       //
       // omp.target map(..) map(..) private(privVar) {
       //  ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
@@ -3338,29 +3339,28 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
 
       IRMapping cloneMap;
       cloneMap.map(allocRegionArg, privVar);
-      auto secondBlockIter = std::next(firstTargetRegion.begin(), 1);
-      allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap);
+      auto newBlockIter = std::next(targetRegion.begin(), 1);
+      allocRegion.cloneInto(&targetRegion, newBlockIter, cloneMap);
 
       unsigned allocRegNumBlocks = allocRegion.getBlocks().size();
-      secondBlockIter = std::next(firstTargetRegion.begin(), 1);
-      auto clonedAllocRegionEndIter =
-          std::next(secondBlockIter, (allocRegNumBlocks - 1));
-      Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
-
-      Operation *br = firstTargetBlock.getTerminator();
-      LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
-      Operation *yield = clonedAllocRegEndBlock.getTerminator();
-      omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
+      newBlockIter = std::next(targetRegion.begin(), 1);
+      auto clonedAllocRegionBackIter =
+          std::next(newBlockIter, (allocRegNumBlocks - 1));
+      Block &clonedAllocRegionLastBlock = *clonedAllocRegionBackIter;
+
+      LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(firstTargetBlock.getTerminator());
+      omp::YieldOp yieldOp =
+          dyn_cast<omp::YieldOp>(clonedAllocRegionLastBlock.getTerminator());
       auto newValue = yieldOp.getResults().front();
-      rewriter.setInsertionPointAfter(yield);
+      rewriter.setInsertionPointAfter(yieldOp);
       auto *oldSucc = brOp.getSuccessor();
-      brOp.setSuccessor(&*secondBlockIter);
+      brOp.setSuccessor(&*newBlockIter);
       // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock
       // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc?
       rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc);
       blockArgsBV.set(blockArg.getArgNumber());
       rewriter.replaceAllUsesWith(blockArg, newValue);
-      rewriter.eraseOp(yield);
+      rewriter.eraseOp(yieldOp);
     }
     // Do some fix ups now.
     // First, remove the blockArguments that we just privatized
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index 38c93b3f0568f8..97da4c832ca8a8 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -25,6 +25,7 @@ llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarge
   llvm.return
 }
 // CHECK: define internal void @__omp_offloading_fd00
+// CHECK-NOT: define {{.*}}
 // CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
 // CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10
 // CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
@@ -38,10 +39,8 @@ omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
 llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
   %0 = llvm.mlir.constant(1 : i64) : i64
   %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
-  %2 = llvm.mlir.constant(1 : i64) : i64
-  %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
-  %4 = llvm.mlir.constant(1 : i64) : i64
-  %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+  %5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
   %6 = llvm.mlir.constant(2 : i32) : i32
   llvm.store %6, %5 : i32, !llvm.ptr
   %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90
index 861a1886c03844..dfe29f33aae9d0 100644
--- a/offload/test/offloading/fortran/target-private-map.f90
+++ b/offload/test/offloading/fortran/target-private-map.f90
@@ -25,7 +25,7 @@ subroutine target_private(r)
   implicit none
   integer, dimension(2) :: simple_vars
   integer :: a, r
-  ! set a to 10
+  ! set a to 5
   a = 5
   simple_vars(1) = a
   simple_vars(2) = a



More information about the Mlir-commits mailing list