[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] Pack task private variables into a heap-allocated context struct (PR #125307)

Tom Eccles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Feb 5 07:50:25 PST 2025


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/125307

>From afa9026eefb6c9cd613ed021a92e159f93c3667c Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Fri, 24 Jan 2025 17:32:41 +0000
Subject: [PATCH 1/8] [mlir][OpenMP] Pack task private variables into a
 heap-allocated context struct

See RFC:
https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084

The aim here is to ensure that tasks which are not executed for a while
after they are created do not try to reference any data which are now
out of scope. This is done by packing the data referred to by the task
into a heap allocated structure (freed at the end of the task).

I decided to create the task context structure in
OpenMPToLLVMIRTranslation instead of adapting how it is done
CodeExtractor (via OpenMPIRBuilder] because CodeExtractor is (at least
in theory) generic code which could have other unrelated uses.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 204 +++++++++++++++---
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |   5 +-
 .../LLVMIR/openmp-task-privatization.mlir     |  82 +++++++
 3 files changed, 254 insertions(+), 37 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-task-privatization.mlir

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8a9a69cefad8ee..5c4deab492c839 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
 #include "mlir/Analysis/TopologicalSortUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/IRMapping.h"
@@ -24,10 +25,12 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/Support/FileSystem.h"
@@ -1331,19 +1334,16 @@ findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
 
 /// Initialize a single (first)private variable. You probably want to use
 /// allocateAndInitPrivateVars instead of this.
-static llvm::Error
-initPrivateVar(llvm::IRBuilderBase &builder,
-               LLVM::ModuleTranslation &moduleTranslation,
-               omp::PrivateClauseOp &privDecl, Value mlirPrivVar,
-               BlockArgument &blockArg, llvm::Value *llvmPrivateVar,
-               llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
-               llvm::BasicBlock *privInitBlock,
-               llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
+/// This returns the private variable which has been initialized. This
+/// variable should be mapped before constructing the body of the Op.
+static llvm::Expected<llvm::Value *> initPrivateVar(
+    llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
+    omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
+    llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
+    llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
   Region &initRegion = privDecl.getInitRegion();
   if (initRegion.empty()) {
-    moduleTranslation.mapValue(blockArg, llvmPrivateVar);
-    llvmPrivateVars.push_back(llvmPrivateVar);
-    return llvm::Error::success();
+    return llvmPrivateVar;
   }
 
   // map initialization region block arguments
@@ -1363,17 +1363,15 @@ initPrivateVar(llvm::IRBuilderBase &builder,
 
   assert(phis.size() == 1 && "expected one allocation to be yielded");
 
-  // prefer the value yielded from the init region to the allocated private
-  // variable in case the region is operating on arguments by-value (e.g.
-  // Fortran character boxes).
-  moduleTranslation.mapValue(blockArg, phis[0]);
-  llvmPrivateVars.push_back(phis[0]);
-
   // clear init region block argument mapping in case it needs to be
   // re-created with a different source for another use of the same
   // reduction decl
   moduleTranslation.forgetMapping(initRegion);
-  return llvm::Error::success();
+
+  // Prefer the value yielded from the init region to the allocated private
+  // variable in case the region is operating on arguments by-value (e.g.
+  // Fortran character boxes).
+  return phis[0];
 }
 
 /// Allocate and initialize delayed private variables. Returns the basic block
@@ -1415,11 +1413,13 @@ static llvm::Expected<llvm::BasicBlock *> allocateAndInitPrivateVars(
     llvm::Value *llvmPrivateVar = builder.CreateAlloca(
         llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
 
-    llvm::Error err = initPrivateVar(
+    llvm::Expected<llvm::Value *> privateVarOrError = initPrivateVar(
         builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
-        llvmPrivateVar, llvmPrivateVars, privInitBlock, mappedPrivateVars);
-    if (err)
+        llvmPrivateVar, privInitBlock, mappedPrivateVars);
+    if (auto err = privateVarOrError.takeError())
       return err;
+    llvmPrivateVars.push_back(privateVarOrError.get());
+    moduleTranslation.mapValue(blockArg, privateVarOrError.get());
   }
   return afterAllocas;
 }
@@ -1730,6 +1730,97 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
+namespace {
+/// TaskContextStructManager takes care of creating and freeing a structure
+/// containing information needed by the task body to execute.
+class TaskContextStructManager {
+public:
+  TaskContextStructManager(llvm::IRBuilderBase &builder,
+                           LLVM::ModuleTranslation &moduleTranslation)
+      : builder{builder}, moduleTranslation{moduleTranslation} {}
+
+  /// Creates a heap allocated struct containing space for each private
+  /// variable. Returns nullptr if there are is no struct needed. Invariant:
+  /// privateVarTypes, privateDecls, and the elements of the structure should
+  /// all have the same order.
+  void
+  generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
+
+  /// Create GEPs to access each member of the structure representing a private
+  /// variable, adding them to llvmPrivateVars.
+  void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
+
+  /// De-allocate the task context structure.
+  void freeStructPtr();
+
+  llvm::Value *getStructPtr() { return structPtr; }
+
+private:
+  llvm::IRBuilderBase &builder;
+  LLVM::ModuleTranslation &moduleTranslation;
+
+  /// The type of each member of the structure, in order.
+  SmallVector<llvm::Type *> privateVarTypes;
+
+  /// A pointer to the structure containing context for this task.
+  llvm::Value *structPtr = nullptr;
+  /// The type of the structure
+  llvm::Type *structTy = nullptr;
+};
+} // namespace
+
+void TaskContextStructManager::generateTaskContextStruct(
+    MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
+  if (privateDecls.empty())
+    return;
+  privateVarTypes.reserve(privateDecls.size());
+
+  for (omp::PrivateClauseOp &privOp : privateDecls) {
+    Type mlirType = privOp.getType();
+    privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
+  }
+
+  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
+                                   privateVarTypes);
+
+  llvm::DataLayout dataLayout =
+      builder.GetInsertBlock()->getModule()->getDataLayout();
+  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
+  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
+
+  // Heap allocate the structure
+  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
+                                   /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
+                                   "omp.task.context_ptr");
+}
+
+void TaskContextStructManager::createGEPsToPrivateVars(
+    SmallVectorImpl<llvm::Value *> &llvmPrivateVars) {
+  if (!structPtr) {
+    assert(privateVarTypes.empty());
+    return;
+  }
+
+  // Create GEPs for each struct member and initialize llvmPrivateVars to point
+  llvmPrivateVars.reserve(privateVarTypes.size());
+  llvm::Value *zero = builder.getInt32(0);
+  for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
+    llvm::Value *iVal = builder.getInt32(i);
+    llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
+    llvmPrivateVars.push_back(gep);
+  }
+}
+
+void TaskContextStructManager::freeStructPtr() {
+  if (!structPtr)
+    return;
+
+  llvm::IRBuilderBase::InsertPointGuard guard{builder};
+  // Ensure we don't put the call to free() after the terminator
+  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  builder.CreateFree(structPtr);
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -1744,6 +1835,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   SmallVector<mlir::Value> mlirPrivateVars;
   SmallVector<llvm::Value *> llvmPrivateVars;
   SmallVector<omp::PrivateClauseOp> privateDecls;
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation};
   mlirPrivateVars.reserve(privateBlockArgs.size());
   llvmPrivateVars.reserve(privateBlockArgs.size());
   collectPrivatizationDecls(taskOp, privateDecls);
@@ -1796,27 +1888,50 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   // Allocate and initialize private variables
   // TODO: package private variables up in a structure
   builder.SetInsertPoint(initBlock->getTerminator());
-  for (auto [privDecl, mlirPrivVar, blockArg] :
-       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
-    llvm::Type *llvmAllocType =
-        moduleTranslation.convertType(privDecl.getType());
 
-    // Allocations:
-    builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
-    llvm::Value *llvmPrivateVar = builder.CreateAlloca(
-        llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
-
-    // builder.SetInsertPoint(initBlock->getTerminator());
-    auto err =
+  // Create task variable structure
+  llvm::SmallVector<llvm::Value *> privateVarAllocations;
+  taskStructMgr.generateTaskContextStruct(privateDecls);
+  // GEPs so that we can initialize the variables. Don't use these GEPs inside
+  // of the body otherwise it will be the GEP not the struct which is fowarded
+  // to the outlined function. GEPs forwarded in this way are passed in a
+  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
+  // which may not be executed until after the current stack frame goes out of
+  // scope.
+  taskStructMgr.createGEPsToPrivateVars(privateVarAllocations);
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
+                       privateVarAllocations)) {
+    llvm::Expected<llvm::Value *> privateVarOrErr =
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
-                       blockArg, llvmPrivateVar, llvmPrivateVars, initBlock);
-    if (err)
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (auto err = privateVarOrErr.takeError())
       return handleError(std::move(err), *taskOp.getOperation());
+
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    // TODO: this is a bit of a hack for Fortran character boxes
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+
+    // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
+    // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
+    // stack allocated structure.
   }
 
   // firstprivate copy region
   if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  llvmPrivateVars, privateDecls, copyBlock)))
+                                  privateVarAllocations, privateDecls,
+                                  copyBlock)))
     return llvm::failure();
 
   // Set up for call to createTask()
@@ -1826,6 +1941,22 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
                     InsertPointTy codegenIP) -> llvm::Error {
     // translate the body of the task:
     builder.restoreIP(codegenIP);
+
+    // Find and map the addresses of each variable within the task context
+    // structure
+    taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
+    for (auto [blockArg, llvmPrivateVar] :
+         llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
+      // Fix broken pass-by-value case for Fortran character boxes
+      if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+        llvmPrivateVar = builder.CreateLoad(
+            moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
+      }
+      assert(llvmPrivateVar->getType() ==
+             moduleTranslation.convertType(blockArg.getType()));
+      moduleTranslation.mapValue(blockArg, llvmPrivateVar);
+    }
+
     auto continuationBlockOrError = convertOmpOpRegions(
         taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
     if (failed(handleError(continuationBlockOrError, *taskOp)))
@@ -1837,6 +1968,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
                                   llvmPrivateVars, privateDecls)))
       return llvm::make_error<PreviouslyReportedError>();
 
+    // Free heap allocated task context structure at the end of the task.
+    taskStructMgr.freeStructPtr();
+
     return llvm::Error::success();
   };
 
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 503ddf10c80cdc..9943198d890cec 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2846,12 +2846,13 @@ llvm.func @task(%arg0 : !llvm.ptr) {
 // CHECK:         %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8
 // CHECK:         br label %task.body
 // CHECK:       task.body:                                        ; preds = %task.alloca
+// CHECK:         %[[VAL_15:.*]] = getelementptr { i32 }, ptr %[[VAL_14]], i32 0, i32 0
 // CHECK:         br label %omp.task.region
 // CHECK:       omp.task.region:                                  ; preds = %task.body
-// CHECK:         call void @foo(ptr %[[VAL_14]])
+// CHECK:         call void @foo(ptr %[[VAL_15]])
 // CHECK:         br label %omp.region.cont
 // CHECK:       omp.region.cont:                                  ; preds = %omp.task.region
-// CHECK:         call void @destroy(ptr %[[VAL_14]])
+// CHECK:         call void @destroy(ptr %[[VAL_15]])
 // CHECK:         br label %task.exit.exitStub
 // CHECK:       task.exit.exitStub:                               ; preds = %omp.region.cont
 // CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
new file mode 100644
index 00000000000000..39089fb87e0ecf
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @privatizer : i32
+
+omp.private {type = firstprivate} @firstprivatizer : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @task_privatization_test() {
+  %c0 = llvm.mlir.constant(0: i32) : i32
+  %c1 = llvm.mlir.constant(1: i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  llvm.store %c0, %0 : i32, !llvm.ptr
+  llvm.store %c1, %1 : i32, !llvm.ptr
+
+  omp.task private(@privatizer %0 -> %arg0, @firstprivatizer %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    %2 = llvm.load %arg1 : !llvm.ptr -> i32
+    llvm.store %2, %arg0 : i32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK:       define void @task_privatization_test()
+// CHECK:         %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8
+// CHECK:         %[[VAL_0:.*]] = alloca i32, align 4
+// CHECK:         %[[VAL_1:.*]] = alloca i32, align 4
+// CHECK:         store i32 0, ptr %[[VAL_0]], align 4
+// CHECK:         store i32 1, ptr %[[VAL_1]], align 4
+// CHECK:         br label %entry
+// CHECK:       entry:
+// CHECK:         br label %omp.private.init
+// CHECK:       omp.private.init:
+// CHECK:         %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
+// CHECK:         %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
+// CHECK:         %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
+// CHECK:         br label %omp.private.copy1
+// CHECK:       omp.private.copy1:
+// CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
+// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
+// CHECK:         br label %omp.private.copy
+// CHECK:       omp.private.copy:
+// CHECK:         br label %omp.task.start
+// CHECK:       omp.task.start:
+// CHECK:         br label %codeRepl
+// CHECK:       codeRepl:
+// CHECK:         %[[GEP_OMP_TASK_CONTEXT_PTR:.*]] = getelementptr { ptr }, ptr %[[STRUCT_ARG]], i32 0, i32 0
+// CHECK:         store ptr %[[VAL_5]], ptr %[[GEP_OMP_TASK_CONTEXT_PTR]], align 8
+// CHECK:         %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 8, ptr @task_privatization_test..omp_par)
+// CHECK:         %[[ALLOCATED_TASK_STRUCT:.*]] = load ptr, ptr %[[VAL_15]], align 8
+// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[ALLOCATED_TASK_STRUCT]], ptr align 1 %[[STRUCT_ARG]], i64 8, i1 false)
+// CHECK:         %[[VAL_16:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]])
+// CHECK:         br label %[[VAL_17:.*]]
+// CHECK:       task.exit:
+// CHECK:         ret void
+
+// CHECK-LABEL: define internal void @task_privatization_test..omp_par(
+// CHECK-SAME:      i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR:.*]])
+// CHECK:       task.alloca:
+// CHECK:         %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
+// CHECK:         %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
+// CHECK:         %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
+// CHECK:         br label %[[VAL_18:.*]]
+// CHECK:       task.body:                                        ; preds = %[[VAL_19:.*]]
+// CHECK:         %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
+// CHECK:         %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
+// CHECK:         br label %[[VAL_23:.*]]
+// CHECK:       omp.task.region:                                  ; preds = %[[VAL_18]]
+// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
+// CHECK:         store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
+// CHECK:         br label %[[VAL_25:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_23]]
+// CHECK:         tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])
+// CHECK:         br label %[[VAL_26:.*]]
+// CHECK:       task.exit.exitStub:                               ; preds = %[[VAL_25]]
+// CHECK:         ret void
+

>From f8a0b7181c86046f2d833eff97c4cd88db656285 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 3 Feb 2025 11:43:32 +0000
Subject: [PATCH 2/8] Update after code review on previous patch

---
 mlir/test/Target/LLVMIR/openmp-llvm.mlir | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 9943198d890cec..ab50e49f870421 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2825,10 +2825,11 @@ llvm.func @task(%arg0 : !llvm.ptr) {
 // CHECK-LABEL: @task
 // CHECK-SAME:      (ptr %[[ARG:.*]])
 // CHECK:         %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8
-// CHECK:         %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
 //                ...
 // CHECK:         br label %omp.private.init
 // CHECK:       omp.private.init:
+// CHECK:         %[[OMP_TASK_CONTEXT_PTR:.*]] = tail call ptr @malloc(
+// CHECK:         %[[OMP_PRIVATE_ALLOC:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]]
 // CHECK:         br label %omp.private.copy1
 // CHECK:       omp.private.copy1:
 // CHECK:         %[[LOADED:.*]] = load i32, ptr %[[ARG]], align 4

>From d6d50cd96025d1aa74cbdb7f5da604cf104d9afc Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 4 Feb 2025 11:16:13 +0000
Subject: [PATCH 3/8] Allocate simple private vars inside of the task

There's no need to put the variable in the context structure if it can
be initialized without reading from the mold variable.

This leads to much simpler code generation for nested tasks which only
privatize simple scalar variables and do not use firstprivate. This
works around the failure in 0226_0013 (Fujitsu test).
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 87 ++++++++++++++++---
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  5 +-
 .../LLVMIR/openmp-task-privatization.mlir     | 13 ++-
 3 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 5c4deab492c839..1d36181a2fa8fa 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1730,24 +1730,42 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
+static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
+  if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
+    return true;
+
+  Region &initRegion = priv.getInitRegion();
+  if (initRegion.empty())
+    return false;
+
+  BlockArgument sourceVariable = priv.getInitMoldArg();
+  if (!sourceVariable)
+    return false;
+  return !sourceVariable.use_empty();
+}
+
 namespace {
 /// TaskContextStructManager takes care of creating and freeing a structure
 /// containing information needed by the task body to execute.
 class TaskContextStructManager {
 public:
   TaskContextStructManager(llvm::IRBuilderBase &builder,
-                           LLVM::ModuleTranslation &moduleTranslation)
-      : builder{builder}, moduleTranslation{moduleTranslation} {}
+                           LLVM::ModuleTranslation &moduleTranslation,
+                           MutableArrayRef<omp::PrivateClauseOp> privateDecls)
+      : builder{builder}, moduleTranslation{moduleTranslation},
+        privateDecls{privateDecls} {}
 
   /// Creates a heap allocated struct containing space for each private
   /// variable. Returns nullptr if there are is no struct needed. Invariant:
   /// privateVarTypes, privateDecls, and the elements of the structure should
-  /// all have the same order.
-  void
-  generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
+  /// all have the same order (although privateDecls which do not read from the
+  /// mold argument are skipped).
+  void generateTaskContextStruct();
 
   /// Create GEPs to access each member of the structure representing a private
-  /// variable, adding them to llvmPrivateVars.
+  /// variable, adding them to llvmPrivateVars. Null values are added where
+  /// private decls were skipped so that the ordering continues to match the
+  /// private decls.
   void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
 
   /// De-allocate the task context structure.
@@ -1758,6 +1776,7 @@ class TaskContextStructManager {
 private:
   llvm::IRBuilderBase &builder;
   LLVM::ModuleTranslation &moduleTranslation;
+  MutableArrayRef<omp::PrivateClauseOp> privateDecls;
 
   /// The type of each member of the structure, in order.
   SmallVector<llvm::Type *> privateVarTypes;
@@ -1769,13 +1788,16 @@ class TaskContextStructManager {
 };
 } // namespace
 
-void TaskContextStructManager::generateTaskContextStruct(
-    MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
+void TaskContextStructManager::generateTaskContextStruct() {
   if (privateDecls.empty())
     return;
   privateVarTypes.reserve(privateDecls.size());
 
   for (omp::PrivateClauseOp &privOp : privateDecls) {
+    // Skip private variables which can safely be allocated and initialised
+    // inside of the task
+    if (!privatizerReadsSourceVariable(privOp))
+      continue;
     Type mlirType = privOp.getType();
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
   }
@@ -1804,10 +1826,17 @@ void TaskContextStructManager::createGEPsToPrivateVars(
   // Create GEPs for each struct member and initialize llvmPrivateVars to point
   llvmPrivateVars.reserve(privateVarTypes.size());
   llvm::Value *zero = builder.getInt32(0);
-  for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
+  unsigned i = 0;
+  for (auto privDecl : privateDecls) {
+    if (!privatizerReadsSourceVariable(privDecl)) {
+      // Handle this inside of the task so we don't pass unnessecary vars in
+      llvmPrivateVars.push_back(nullptr);
+      continue;
+    }
     llvm::Value *iVal = builder.getInt32(i);
     llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
     llvmPrivateVars.push_back(gep);
+    i += 1;
   }
 }
 
@@ -1835,10 +1864,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   SmallVector<mlir::Value> mlirPrivateVars;
   SmallVector<llvm::Value *> llvmPrivateVars;
   SmallVector<omp::PrivateClauseOp> privateDecls;
-  TaskContextStructManager taskStructMgr{builder, moduleTranslation};
   mlirPrivateVars.reserve(privateBlockArgs.size());
   llvmPrivateVars.reserve(privateBlockArgs.size());
   collectPrivatizationDecls(taskOp, privateDecls);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateDecls};
   for (mlir::Value privateVar : taskOp.getPrivateVars())
     mlirPrivateVars.push_back(privateVar);
 
@@ -1891,7 +1921,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   // Create task variable structure
   llvm::SmallVector<llvm::Value *> privateVarAllocations;
-  taskStructMgr.generateTaskContextStruct(privateDecls);
+  taskStructMgr.generateTaskContextStruct();
   // GEPs so that we can initialize the variables. Don't use these GEPs inside
   // of the body otherwise it will be the GEP not the struct which is fowarded
   // to the outlined function. GEPs forwarded in this way are passed in a
@@ -1903,6 +1933,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
                        privateVarAllocations)) {
+    if (!llvmPrivateVarAlloc)
+      // to be handled inside the task
+      continue;
+
     llvm::Expected<llvm::Value *> privateVarOrErr =
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
                        blockArg, llvmPrivateVarAlloc, initBlock);
@@ -1939,14 +1973,45 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
     // translate the body of the task:
     builder.restoreIP(codegenIP);
 
+    llvm::BasicBlock *privInitBlock = nullptr;
+    for (auto [blockArg, privDecl, mlirPrivVar] :
+         llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
+      if (privatizerReadsSourceVariable(privDecl))
+        // This is handled before the task executes
+        continue;
+
+      auto codegenInsertionPt = builder.saveIP();
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (auto err = privateVarOrError.takeError())
+        return err;
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      builder.restoreIP(codegenInsertionPt);
+    }
+
     // Find and map the addresses of each variable within the task context
     // structure
     taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
     for (auto [blockArg, llvmPrivateVar] :
          llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
+      if (!llvmPrivateVar)
+        // This was handled above
+        continue;
       // Fix broken pass-by-value case for Fortran character boxes
       if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
         llvmPrivateVar = builder.CreateLoad(
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index ab50e49f870421..ba05d97c9a4eda 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
 // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
 // CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
 // CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
-// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
+// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
 // CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
-// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
+// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8
+// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]],
 // CHECK: define internal void @[[parallel_outlined_fn]]
 // -----
 
diff --git a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
index 39089fb87e0ecf..af469460f3cee0 100644
--- a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
@@ -36,12 +36,11 @@ llvm.func @task_privatization_test() {
 // CHECK:         br label %omp.private.init
 // CHECK:       omp.private.init:
 // CHECK:         %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
-// CHECK:         %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
-// CHECK:         %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
+// CHECK:         %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0
 // CHECK:         br label %omp.private.copy1
 // CHECK:       omp.private.copy1:
 // CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
-// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
+// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4
 // CHECK:         br label %omp.private.copy
 // CHECK:       omp.private.copy:
 // CHECK:         br label %omp.task.start
@@ -65,14 +64,14 @@ llvm.func @task_privatization_test() {
 // CHECK:         %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
 // CHECK:         %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
 // CHECK:         %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
+// CHECK:         %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
 // CHECK:         br label %[[VAL_18:.*]]
 // CHECK:       task.body:                                        ; preds = %[[VAL_19:.*]]
-// CHECK:         %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
-// CHECK:         %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
+// CHECK:         %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
 // CHECK:         br label %[[VAL_23:.*]]
 // CHECK:       omp.task.region:                                  ; preds = %[[VAL_18]]
-// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
-// CHECK:         store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
+// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4
+// CHECK:         store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
 // CHECK:         br label %[[VAL_25:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = %[[VAL_23]]
 // CHECK:         tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])

>From 3487dfcf8b07d6a81928d31dd31004c67f6202fc Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 09:39:33 +0000
Subject: [PATCH 4/8] Add methods to operation to test if the mold arg is read

---
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  2 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 15 ++++++++++++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 20 +++----------------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 36a8efd43f8ce8..f96faad81add14 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -587,7 +587,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
           sym);
       // TODO: currently there are false positives from dead uses of the mold
       // arg
-      if (!result.getInitMoldArg().getUses().empty())
+      if (result.initReadsFromMold())
         mightHaveReadHostSym = true;
     }
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 983627027ac9c3..b825c8ae2aeae7 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -146,13 +146,24 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
       return region.empty() ? nullptr : region.getArgument(0);
     }
 
+    /// Returns true if the init region might read from the mold argument
+    bool initReadsFromMold() {
+      BlockArgument moldArg = getInitMoldArg();
+      return moldArg ? !moldArg.use_empty() : false;
+    }
+
+    /// Returns true if any region of this privatizer might read from the mold
+    /// argument
+    bool readsFromMold() {
+      return initReadsFromMold() || !getCopyRegion().empty();
+    }
+
     /// needsMap returns true if the value being privatized should additionally
     /// be mapped to the target region using a MapInfoOp. This is most common
     /// when an allocatable is privatized. In such cases, the descriptor is used
     /// in privatization and needs to be mapped on to the device.
     bool needsMap() {
-      BlockArgument moldArg = getInitMoldArg();
-      return moldArg ? !moldArg.use_empty() : false;
+      return initReadsFromMold();
     }
 
     /// Get the type for arguments to nested regions. This should
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1d36181a2fa8fa..1b108421845b5a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1730,20 +1730,6 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
-static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
-  if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
-    return true;
-
-  Region &initRegion = priv.getInitRegion();
-  if (initRegion.empty())
-    return false;
-
-  BlockArgument sourceVariable = priv.getInitMoldArg();
-  if (!sourceVariable)
-    return false;
-  return !sourceVariable.use_empty();
-}
-
 namespace {
 /// TaskContextStructManager takes care of creating and freeing a structure
 /// containing information needed by the task body to execute.
@@ -1796,7 +1782,7 @@ void TaskContextStructManager::generateTaskContextStruct() {
   for (omp::PrivateClauseOp &privOp : privateDecls) {
     // Skip private variables which can safely be allocated and initialised
     // inside of the task
-    if (!privatizerReadsSourceVariable(privOp))
+    if (!privOp.readsFromMold())
       continue;
     Type mlirType = privOp.getType();
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
@@ -1828,7 +1814,7 @@ void TaskContextStructManager::createGEPsToPrivateVars(
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
   for (auto privDecl : privateDecls) {
-    if (!privatizerReadsSourceVariable(privDecl)) {
+    if (!privDecl.readsFromMold()) {
       // Handle this inside of the task so we don't pass unnessecary vars in
       llvmPrivateVars.push_back(nullptr);
       continue;
@@ -1984,7 +1970,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     llvm::BasicBlock *privInitBlock = nullptr;
     for (auto [blockArg, privDecl, mlirPrivVar] :
          llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
-      if (privatizerReadsSourceVariable(privDecl))
+      if (privDecl.readsFromMold())
         // This is handled before the task executes
         continue;
 

>From ae86447fa525ecfbe9bd0996974a2b3b0ce74481 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 09:39:59 +0000
Subject: [PATCH 5/8] Fix comment

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1b108421845b5a..7b619865312b5c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1742,10 +1742,9 @@ class TaskContextStructManager {
         privateDecls{privateDecls} {}
 
   /// Creates a heap allocated struct containing space for each private
-  /// variable. Returns nullptr if there are is no struct needed. Invariant:
-  /// privateVarTypes, privateDecls, and the elements of the structure should
-  /// all have the same order (although privateDecls which do not read from the
-  /// mold argument are skipped).
+  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
+  /// the structure should all have the same order (although privateDecls which
+  /// do not read from the mold argument are skipped).
   void generateTaskContextStruct();
 
   /// Create GEPs to access each member of the structure representing a private

>From 5c60c8fc7536f8f35134b31503f85fd7089a8afc Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 09:50:17 +0000
Subject: [PATCH 6/8] Remove outdated comment

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7b619865312b5c..0aa286655d864f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1901,7 +1901,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
       moduleTranslation, allocaIP);
 
   // Allocate and initialize private variables
-  // TODO: package private variables up in a structure
   builder.SetInsertPoint(initBlock->getTerminator());
 
   // Create task variable structure

>From 9baebf1aaf31c444183bd3188237683b2cd90c04 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 10:00:29 +0000
Subject: [PATCH 7/8] Make logic clearer

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0aa286655d864f..18c8e950f76ef8 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1917,9 +1917,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
                        privateVarAllocations)) {
-    if (!llvmPrivateVarAlloc)
+    if (!privDecl.readsFromMold())
       // to be handled inside the task
       continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
 
     llvm::Expected<llvm::Value *> privateVarOrErr =
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,

>From dd07fa7ae85c3837862b7771d2ad5ed449e87f16 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 15:48:58 +0000
Subject: [PATCH 8/8] Move llvm private vars into task context struct manager

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 18c8e950f76ef8..1b049671b7a680 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1428,7 +1428,7 @@ static LogicalResult
 initFirstPrivateVars(llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation,
                      SmallVectorImpl<mlir::Value> &mlirPrivateVars,
-                     SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+                     ArrayRef<llvm::Value *> llvmPrivateVars,
                      SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
                      llvm::BasicBlock *afterAllocas) {
   llvm::IRBuilderBase::InsertPointGuard guard(builder);
@@ -1751,11 +1751,15 @@ class TaskContextStructManager {
   /// variable, adding them to llvmPrivateVars. Null values are added where
   /// private decls were skipped so that the ordering continues to match the
   /// private decls.
-  void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
+  void createGEPsToPrivateVars();
 
   /// De-allocate the task context structure.
   void freeStructPtr();
 
+  MutableArrayRef<llvm::Value *> getLLVMPrivateVars() {
+    return llvmPrivateVars;
+  }
+
   llvm::Value *getStructPtr() { return structPtr; }
 
 private:
@@ -1766,6 +1770,10 @@ class TaskContextStructManager {
   /// The type of each member of the structure, in order.
   SmallVector<llvm::Type *> privateVarTypes;
 
+  /// LLVM values for each private variable, or null if that private variable is
+  /// not included in the task context structure
+  SmallVector<llvm::Value *> llvmPrivateVars;
+
   /// A pointer to the structure containing context for this task.
   llvm::Value *structPtr = nullptr;
   /// The type of the structure
@@ -1801,14 +1809,14 @@ void TaskContextStructManager::generateTaskContextStruct() {
                                    "omp.task.context_ptr");
 }
 
-void TaskContextStructManager::createGEPsToPrivateVars(
-    SmallVectorImpl<llvm::Value *> &llvmPrivateVars) {
+void TaskContextStructManager::createGEPsToPrivateVars() {
   if (!structPtr) {
     assert(privateVarTypes.empty());
     return;
   }
 
   // Create GEPs for each struct member and initialize llvmPrivateVars to point
+  llvmPrivateVars.clear();
   llvmPrivateVars.reserve(privateVarTypes.size());
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
@@ -1904,7 +1912,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   builder.SetInsertPoint(initBlock->getTerminator());
 
   // Create task variable structure
-  llvm::SmallVector<llvm::Value *> privateVarAllocations;
   taskStructMgr.generateTaskContextStruct();
   // GEPs so that we can initialize the variables. Don't use these GEPs inside
   // of the body otherwise it will be the GEP not the struct which is fowarded
@@ -1912,11 +1919,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
   // which may not be executed until after the current stack frame goes out of
   // scope.
-  taskStructMgr.createGEPsToPrivateVars(privateVarAllocations);
+  taskStructMgr.createGEPsToPrivateVars();
 
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
-                       privateVarAllocations)) {
+                       taskStructMgr.getLLVMPrivateVars())) {
     if (!privDecl.readsFromMold())
       // to be handled inside the task
       continue;
@@ -1950,8 +1957,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   // firstprivate copy region
   if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  privateVarAllocations, privateDecls,
-                                  copyBlock)))
+                                  taskStructMgr.getLLVMPrivateVars(),
+                                  privateDecls, copyBlock)))
     return llvm::failure();
 
   // Set up for call to createTask()
@@ -1992,7 +1999,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
     // Find and map the addresses of each variable within the task context
     // structure
-    taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
+    taskStructMgr.createGEPsToPrivateVars();
+    llvm::copy(taskStructMgr.getLLVMPrivateVars(),
+               std::back_inserter(llvmPrivateVars));
     for (auto [blockArg, llvmPrivateVar] :
          llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
       if (!llvmPrivateVar)



More information about the llvm-branch-commits mailing list