[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] Pack task private variables into a heap-allocated context struct (PR #125307)

Tom Eccles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Feb 24 05:21:44 PST 2025


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/125307

>From df47461dcf1f9244da472efdcc57d266ecd42b34 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Fri, 24 Jan 2025 17:32:41 +0000
Subject: [PATCH 01/11] [mlir][OpenMP] Pack task private variables into a
 heap-allocated context struct

See RFC:
https://discourse.llvm.org/t/rfc-openmp-supporting-delayed-task-execution-with-firstprivate-variables/83084

The aim here is to ensure that tasks which are not executed for a while
after they are created do not try to reference any data which are now
out of scope. This is done by packing the data referred to by the task
into a heap allocated structure (freed at the end of the task).

I decided to create the task context structure in
OpenMPToLLVMIRTranslation instead of adapting how it is done
CodeExtractor (via OpenMPIRBuilder] because CodeExtractor is (at least
in theory) generic code which could have other unrelated uses.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 209 ++++++++++++++----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  10 +-
 .../LLVMIR/openmp-task-privatization.mlir     |  80 +++++++
 3 files changed, 258 insertions(+), 41 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-task-privatization.mlir

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f3e57e8fcaa2b..9b0f9486529d2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
 #include "mlir/Analysis/TopologicalSortUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/IRMapping.h"
@@ -24,10 +25,12 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/Support/FileSystem.h"
@@ -1336,15 +1339,16 @@ findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
 
 /// Initialize a single (first)private variable. You probably want to use
 /// allocateAndInitPrivateVars instead of this.
-static llvm::Error initPrivateVar(
+/// This returns the private variable which has been initialized. This
+/// variable should be mapped before constructing the body of the Op.
+static llvm::Expected<llvm::Value *> initPrivateVar(
     llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
     omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
-    llvm::Value **llvmPrivateVarIt, llvm::BasicBlock *privInitBlock,
+    llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
     llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
   Region &initRegion = privDecl.getInitRegion();
   if (initRegion.empty()) {
-    moduleTranslation.mapValue(blockArg, *llvmPrivateVarIt);
-    return llvm::Error::success();
+    return llvmPrivateVar;
   }
 
   // map initialization region block arguments
@@ -1352,7 +1356,7 @@ static llvm::Error initPrivateVar(
       mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
   assert(nonPrivateVar);
   moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar);
-  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), *llvmPrivateVarIt);
+  moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar);
 
   // in-place convert the private initialization region
   SmallVector<llvm::Value *, 1> phis;
@@ -1363,17 +1367,15 @@ static llvm::Error initPrivateVar(
 
   assert(phis.size() == 1 && "expected one allocation to be yielded");
 
-  // prefer the value yielded from the init region to the allocated private
-  // variable in case the region is operating on arguments by-value (e.g.
-  // Fortran character boxes).
-  moduleTranslation.mapValue(blockArg, phis[0]);
-  *llvmPrivateVarIt = phis[0];
-
   // clear init region block argument mapping in case it needs to be
   // re-created with a different source for another use of the same
   // reduction decl
   moduleTranslation.forgetMapping(initRegion);
-  return llvm::Error::success();
+
+  // Prefer the value yielded from the init region to the allocated private
+  // variable in case the region is operating on arguments by-value (e.g.
+  // Fortran character boxes).
+  return phis[0];
 }
 
 static llvm::Error
@@ -1390,16 +1392,19 @@ initPrivateVars(llvm::IRBuilderBase &builder,
   llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init");
   setInsertPointForPossiblyEmptyBlock(builder, privInitBlock);
 
-  for (auto [idx, zip] : llvm::enumerate(
-           llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs))) {
-    auto [privDecl, mlirPrivVar, blockArg] = zip;
-    llvm::Error err = initPrivateVar(
+  for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal(
+           privateDecls, mlirPrivateVars, privateBlockArgs, llvmPrivateVars))) {
+    auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip;
+    llvm::Expected<llvm::Value *> privVarOrErr = initPrivateVar(
         builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
-        llvmPrivateVars.begin() + idx, privInitBlock, mappedPrivateVars);
+        llvmPrivateVar, privInitBlock, mappedPrivateVars);
 
-    if (err)
+    if (auto err = privVarOrErr.takeError())
       return err;
 
+    llvmPrivateVar = privVarOrErr.get();
+    moduleTranslation.mapValue(blockArg, llvmPrivateVar);
+
     setInsertPointForPossiblyEmptyBlock(builder);
   }
 
@@ -1750,6 +1755,97 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
+namespace {
+/// TaskContextStructManager takes care of creating and freeing a structure
+/// containing information needed by the task body to execute.
+class TaskContextStructManager {
+public:
+  TaskContextStructManager(llvm::IRBuilderBase &builder,
+                           LLVM::ModuleTranslation &moduleTranslation)
+      : builder{builder}, moduleTranslation{moduleTranslation} {}
+
+  /// Creates a heap allocated struct containing space for each private
+  /// variable. Returns nullptr if there are is no struct needed. Invariant:
+  /// privateVarTypes, privateDecls, and the elements of the structure should
+  /// all have the same order.
+  void
+  generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
+
+  /// Create GEPs to access each member of the structure representing a private
+  /// variable, adding them to llvmPrivateVars.
+  void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
+
+  /// De-allocate the task context structure.
+  void freeStructPtr();
+
+  llvm::Value *getStructPtr() { return structPtr; }
+
+private:
+  llvm::IRBuilderBase &builder;
+  LLVM::ModuleTranslation &moduleTranslation;
+
+  /// The type of each member of the structure, in order.
+  SmallVector<llvm::Type *> privateVarTypes;
+
+  /// A pointer to the structure containing context for this task.
+  llvm::Value *structPtr = nullptr;
+  /// The type of the structure
+  llvm::Type *structTy = nullptr;
+};
+} // namespace
+
+void TaskContextStructManager::generateTaskContextStruct(
+    MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
+  if (privateDecls.empty())
+    return;
+  privateVarTypes.reserve(privateDecls.size());
+
+  for (omp::PrivateClauseOp &privOp : privateDecls) {
+    Type mlirType = privOp.getType();
+    privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
+  }
+
+  structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
+                                   privateVarTypes);
+
+  llvm::DataLayout dataLayout =
+      builder.GetInsertBlock()->getModule()->getDataLayout();
+  llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout);
+  llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy);
+
+  // Heap allocate the structure
+  structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize,
+                                   /*ArraySize=*/nullptr, /*MallocF=*/nullptr,
+                                   "omp.task.context_ptr");
+}
+
+void TaskContextStructManager::createGEPsToPrivateVars(
+    SmallVectorImpl<llvm::Value *> &llvmPrivateVars) {
+  if (!structPtr) {
+    assert(privateVarTypes.empty());
+    return;
+  }
+
+  // Create GEPs for each struct member and initialize llvmPrivateVars to point
+  llvmPrivateVars.reserve(privateVarTypes.size());
+  llvm::Value *zero = builder.getInt32(0);
+  for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
+    llvm::Value *iVal = builder.getInt32(i);
+    llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
+    llvmPrivateVars.push_back(gep);
+  }
+}
+
+void TaskContextStructManager::freeStructPtr() {
+  if (!structPtr)
+    return;
+
+  llvm::IRBuilderBase::InsertPointGuard guard{builder};
+  // Ensure we don't put the call to free() after the terminator
+  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  builder.CreateFree(structPtr);
+}
+
 /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@@ -1764,6 +1860,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   SmallVector<mlir::Value> mlirPrivateVars;
   SmallVector<llvm::Value *> llvmPrivateVars;
   SmallVector<omp::PrivateClauseOp> privateDecls;
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation};
   mlirPrivateVars.reserve(privateBlockArgs.size());
   llvmPrivateVars.reserve(privateBlockArgs.size());
   collectPrivatizationDecls(taskOp, privateDecls);
@@ -1814,30 +1911,51 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
       moduleTranslation, allocaIP);
 
   // Allocate and initialize private variables
-  // TODO: package private variables up in a structure
-  for (auto [privDecl, mlirPrivVar, blockArg] :
-       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
-    llvm::Type *llvmAllocType =
-        moduleTranslation.convertType(privDecl.getType());
-
-    // Allocations:
-    builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
-    llvm::Value *llvmPrivateVar = builder.CreateAlloca(
-        llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
-
-    builder.SetInsertPoint(initBlock->getTerminator());
-    auto err = initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
-                              blockArg, &llvmPrivateVar, initBlock);
-    if (err)
+  builder.SetInsertPoint(initBlock->getTerminator());
+
+  // Create task variable structure
+  llvm::SmallVector<llvm::Value *> privateVarAllocations;
+  taskStructMgr.generateTaskContextStruct(privateDecls);
+  // GEPs so that we can initialize the variables. Don't use these GEPs inside
+  // of the body otherwise it will be the GEP not the struct which is fowarded
+  // to the outlined function. GEPs forwarded in this way are passed in a
+  // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
+  // which may not be executed until after the current stack frame goes out of
+  // scope.
+  taskStructMgr.createGEPsToPrivateVars(privateVarAllocations);
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
+                       privateVarAllocations)) {
+    llvm::Expected<llvm::Value *> privateVarOrErr =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (auto err = privateVarOrErr.takeError())
       return handleError(std::move(err), *taskOp.getOperation());
 
-    llvmPrivateVars.push_back(llvmPrivateVar);
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    // TODO: this is a bit of a hack for Fortran character boxes
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+
+    // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback
+    // so that OpenMPIRBuilder doesn't try to pass each GEP address through a
+    // stack allocated structure.
   }
 
   // firstprivate copy region
-  builder.SetInsertPoint(copyBlock->getTerminator());
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
   if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  llvmPrivateVars, privateDecls)))
+                                  privateVarAllocations, privateDecls)))
     return llvm::failure();
 
   // Set up for call to createTask()
@@ -1846,7 +1964,21 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codegenIP) -> llvm::Error {
     builder.restoreIP(codegenIP);
-    // translate the body of the task:
+    // Find and map the addresses of each variable within the task context
+    // structure
+    taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
+    for (auto [blockArg, llvmPrivateVar] :
+         llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
+      // Fix broken pass-by-value case for Fortran character boxes
+      if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+        llvmPrivateVar = builder.CreateLoad(
+            moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
+      }
+      assert(llvmPrivateVar->getType() ==
+             moduleTranslation.convertType(blockArg.getType()));
+      moduleTranslation.mapValue(blockArg, llvmPrivateVar);
+    }
+
     auto continuationBlockOrError = convertOmpOpRegions(
         taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
     if (failed(handleError(continuationBlockOrError, *taskOp)))
@@ -1858,6 +1990,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
                                   llvmPrivateVars, privateDecls)))
       return llvm::make_error<PreviouslyReportedError>();
 
+    // Free heap allocated task context structure at the end of the task.
+    taskStructMgr.freeStructPtr();
+
     return llvm::Error::success();
   };
 
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 503ddf10c80cd..87c7f5473045b 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2825,14 +2825,15 @@ llvm.func @task(%arg0 : !llvm.ptr) {
 // CHECK-LABEL: @task
 // CHECK-SAME:      (ptr %[[ARG:.*]])
 // CHECK:         %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8
-// CHECK:         %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
 //                ...
 // CHECK:         br label %omp.private.init
 // CHECK:       omp.private.init:
+// CHECK:         %[[TASK_STRUCT:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:         %[[GEP:.*]] = getelementptr { i32 }, ptr %[[TASK_STRUCT:.*]], i32 0, i32 0
 // CHECK:         br label %omp.private.copy1
 // CHECK:       omp.private.copy1:
 // CHECK:         %[[LOADED:.*]] = load i32, ptr %[[ARG]], align 4
-// CHECK:         store i32 %[[LOADED]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
+// CHECK:         store i32 %[[LOADED]], ptr %[[GEP]], align 4
 //                ...
 // CHECK:         br label %omp.task.start
 // CHECK:       omp.task.start:
@@ -2846,12 +2847,13 @@ llvm.func @task(%arg0 : !llvm.ptr) {
 // CHECK:         %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8
 // CHECK:         br label %task.body
 // CHECK:       task.body:                                        ; preds = %task.alloca
+// CHECK:         %[[VAL_15:.*]] = getelementptr { i32 }, ptr %[[VAL_14]], i32 0, i32 0
 // CHECK:         br label %omp.task.region
 // CHECK:       omp.task.region:                                  ; preds = %task.body
-// CHECK:         call void @foo(ptr %[[VAL_14]])
+// CHECK:         call void @foo(ptr %[[VAL_15]])
 // CHECK:         br label %omp.region.cont
 // CHECK:       omp.region.cont:                                  ; preds = %omp.task.region
-// CHECK:         call void @destroy(ptr %[[VAL_14]])
+// CHECK:         call void @destroy(ptr %[[VAL_15]])
 // CHECK:         br label %task.exit.exitStub
 // CHECK:       task.exit.exitStub:                               ; preds = %omp.region.cont
 // CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
new file mode 100644
index 0000000000000..78cbda5de67d7
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
@@ -0,0 +1,80 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @privatizer : i32
+
+omp.private {type = firstprivate} @firstprivatizer : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @task_privatization_test() {
+  %c0 = llvm.mlir.constant(0: i32) : i32
+  %c1 = llvm.mlir.constant(1: i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  llvm.store %c0, %0 : i32, !llvm.ptr
+  llvm.store %c1, %1 : i32, !llvm.ptr
+
+  omp.task private(@privatizer %0 -> %arg0, @firstprivatizer %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    %2 = llvm.load %arg1 : !llvm.ptr -> i32
+    llvm.store %2, %arg0 : i32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK:       define void @task_privatization_test()
+// CHECK:         %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8
+// CHECK:         %[[VAL_0:.*]] = alloca i32, align 4
+// CHECK:         %[[VAL_1:.*]] = alloca i32, align 4
+// CHECK:         store i32 0, ptr %[[VAL_0]], align 4
+// CHECK:         store i32 1, ptr %[[VAL_1]], align 4
+// CHECK:         br label %entry
+// CHECK:       entry:
+// CHECK:         br label %omp.private.init
+// CHECK:       omp.private.init:
+// CHECK:         %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
+// CHECK:         %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
+// CHECK:         %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
+// CHECK:         br label %omp.private.copy
+// CHECK:       omp.private.copy:
+// CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
+// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
+// CHECK:         br label %omp.task.start
+// CHECK:       omp.task.start:
+// CHECK:         br label %codeRepl
+// CHECK:       codeRepl:
+// CHECK:         %[[GEP_OMP_TASK_CONTEXT_PTR:.*]] = getelementptr { ptr }, ptr %[[STRUCT_ARG]], i32 0, i32 0
+// CHECK:         store ptr %[[VAL_5]], ptr %[[GEP_OMP_TASK_CONTEXT_PTR]], align 8
+// CHECK:         %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 8, ptr @task_privatization_test..omp_par)
+// CHECK:         %[[ALLOCATED_TASK_STRUCT:.*]] = load ptr, ptr %[[VAL_15]], align 8
+// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[ALLOCATED_TASK_STRUCT]], ptr align 1 %[[STRUCT_ARG]], i64 8, i1 false)
+// CHECK:         %[[VAL_16:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]])
+// CHECK:         br label %[[VAL_17:.*]]
+// CHECK:       task.exit:
+// CHECK:         ret void
+
+// CHECK-LABEL: define internal void @task_privatization_test..omp_par(
+// CHECK-SAME:      i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR:.*]])
+// CHECK:       task.alloca:
+// CHECK:         %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
+// CHECK:         %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
+// CHECK:         %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
+// CHECK:         br label %[[VAL_18:.*]]
+// CHECK:       task.body:                                        ; preds = %[[VAL_19:.*]]
+// CHECK:         %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
+// CHECK:         %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
+// CHECK:         br label %[[VAL_23:.*]]
+// CHECK:       omp.task.region:                                  ; preds = %[[VAL_18]]
+// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
+// CHECK:         store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
+// CHECK:         br label %[[VAL_25:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_23]]
+// CHECK:         tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])
+// CHECK:         br label %[[VAL_26:.*]]
+// CHECK:       task.exit.exitStub:                               ; preds = %[[VAL_25]]
+// CHECK:         ret void
+

>From 9a4fe6aeddb510c582322fbad9a1eda10fd034dc Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 4 Feb 2025 11:16:13 +0000
Subject: [PATCH 02/11] Allocate simple private vars inside of the task

There's no need to put the variable in the context structure if it can
be initialized without reading from the mold variable.

This leads to much simpler code generation for nested tasks which only
privatize simple scalar variables and do not use firstprivate. This
works around the failure in 0226_0013 (Fujitsu test).
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 89 ++++++++++++++++---
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  5 +-
 .../LLVMIR/openmp-task-privatization.mlir     | 13 ++-
 3 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9b0f9486529d2..bbe4d58270305 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1755,24 +1755,42 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
+static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
+  if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
+    return true;
+
+  Region &initRegion = priv.getInitRegion();
+  if (initRegion.empty())
+    return false;
+
+  BlockArgument sourceVariable = priv.getInitMoldArg();
+  if (!sourceVariable)
+    return false;
+  return !sourceVariable.use_empty();
+}
+
 namespace {
 /// TaskContextStructManager takes care of creating and freeing a structure
 /// containing information needed by the task body to execute.
 class TaskContextStructManager {
 public:
   TaskContextStructManager(llvm::IRBuilderBase &builder,
-                           LLVM::ModuleTranslation &moduleTranslation)
-      : builder{builder}, moduleTranslation{moduleTranslation} {}
+                           LLVM::ModuleTranslation &moduleTranslation,
+                           MutableArrayRef<omp::PrivateClauseOp> privateDecls)
+      : builder{builder}, moduleTranslation{moduleTranslation},
+        privateDecls{privateDecls} {}
 
   /// Creates a heap allocated struct containing space for each private
   /// variable. Returns nullptr if there are is no struct needed. Invariant:
   /// privateVarTypes, privateDecls, and the elements of the structure should
-  /// all have the same order.
-  void
-  generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
+  /// all have the same order (although privateDecls which do not read from the
+  /// mold argument are skipped).
+  void generateTaskContextStruct();
 
   /// Create GEPs to access each member of the structure representing a private
-  /// variable, adding them to llvmPrivateVars.
+  /// variable, adding them to llvmPrivateVars. Null values are added where
+  /// private decls were skipped so that the ordering continues to match the
+  /// private decls.
   void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
 
   /// De-allocate the task context structure.
@@ -1783,6 +1801,7 @@ class TaskContextStructManager {
 private:
   llvm::IRBuilderBase &builder;
   LLVM::ModuleTranslation &moduleTranslation;
+  MutableArrayRef<omp::PrivateClauseOp> privateDecls;
 
   /// The type of each member of the structure, in order.
   SmallVector<llvm::Type *> privateVarTypes;
@@ -1794,13 +1813,16 @@ class TaskContextStructManager {
 };
 } // namespace
 
-void TaskContextStructManager::generateTaskContextStruct(
-    MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
+void TaskContextStructManager::generateTaskContextStruct() {
   if (privateDecls.empty())
     return;
   privateVarTypes.reserve(privateDecls.size());
 
   for (omp::PrivateClauseOp &privOp : privateDecls) {
+    // Skip private variables which can safely be allocated and initialised
+    // inside of the task
+    if (!privatizerReadsSourceVariable(privOp))
+      continue;
     Type mlirType = privOp.getType();
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
   }
@@ -1829,10 +1851,17 @@ void TaskContextStructManager::createGEPsToPrivateVars(
   // Create GEPs for each struct member and initialize llvmPrivateVars to point
   llvmPrivateVars.reserve(privateVarTypes.size());
   llvm::Value *zero = builder.getInt32(0);
-  for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
+  unsigned i = 0;
+  for (auto privDecl : privateDecls) {
+    if (!privatizerReadsSourceVariable(privDecl)) {
+      // Handle this inside of the task so we don't pass unnessecary vars in
+      llvmPrivateVars.push_back(nullptr);
+      continue;
+    }
     llvm::Value *iVal = builder.getInt32(i);
     llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
     llvmPrivateVars.push_back(gep);
+    i += 1;
   }
 }
 
@@ -1860,10 +1889,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   SmallVector<mlir::Value> mlirPrivateVars;
   SmallVector<llvm::Value *> llvmPrivateVars;
   SmallVector<omp::PrivateClauseOp> privateDecls;
-  TaskContextStructManager taskStructMgr{builder, moduleTranslation};
   mlirPrivateVars.reserve(privateBlockArgs.size());
   llvmPrivateVars.reserve(privateBlockArgs.size());
   collectPrivatizationDecls(taskOp, privateDecls);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateDecls};
   for (mlir::Value privateVar : taskOp.getPrivateVars())
     mlirPrivateVars.push_back(privateVar);
 
@@ -1915,7 +1945,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   // Create task variable structure
   llvm::SmallVector<llvm::Value *> privateVarAllocations;
-  taskStructMgr.generateTaskContextStruct(privateDecls);
+  taskStructMgr.generateTaskContextStruct();
   // GEPs so that we can initialize the variables. Don't use these GEPs inside
   // of the body otherwise it will be the GEP not the struct which is fowarded
   // to the outlined function. GEPs forwarded in this way are passed in a
@@ -1927,6 +1957,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
                        privateVarAllocations)) {
+    if (!llvmPrivateVarAlloc)
+      // to be handled inside the task
+      continue;
+
     llvm::Expected<llvm::Value *> privateVarOrErr =
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
                        blockArg, llvmPrivateVarAlloc, initBlock);
@@ -1963,12 +1997,45 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   auto bodyCB = [&](InsertPointTy allocaIP,
                     InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // translate the body of the task:
     builder.restoreIP(codegenIP);
+
+    llvm::BasicBlock *privInitBlock = nullptr;
+    for (auto [blockArg, privDecl, mlirPrivVar] :
+         llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
+      if (privatizerReadsSourceVariable(privDecl))
+        // This is handled before the task executes
+        continue;
+
+      auto codegenInsertionPt = builder.saveIP();
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (auto err = privateVarOrError.takeError())
+        return err;
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      builder.restoreIP(codegenInsertionPt);
+    }
+
     // Find and map the addresses of each variable within the task context
     // structure
     taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
     for (auto [blockArg, llvmPrivateVar] :
          llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
+      if (!llvmPrivateVar)
+        // This was handled above
+        continue;
       // Fix broken pass-by-value case for Fortran character boxes
       if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
         llvmPrivateVar = builder.CreateLoad(
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 87c7f5473045b..2024325984916 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
 // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
 // CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
 // CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
-// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
+// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
 // CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
-// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
+// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8
+// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]],
 // CHECK: define internal void @[[parallel_outlined_fn]]
 // -----
 
diff --git a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
index 78cbda5de67d7..f7a8b970f5f80 100644
--- a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir
@@ -36,12 +36,11 @@ llvm.func @task_privatization_test() {
 // CHECK:         br label %omp.private.init
 // CHECK:       omp.private.init:
 // CHECK:         %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
-// CHECK:         %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
-// CHECK:         %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
+// CHECK:         %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0
 // CHECK:         br label %omp.private.copy
 // CHECK:       omp.private.copy:
 // CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
-// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
+// CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4
 // CHECK:         br label %omp.task.start
 // CHECK:       omp.task.start:
 // CHECK:         br label %codeRepl
@@ -63,14 +62,14 @@ llvm.func @task_privatization_test() {
 // CHECK:         %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
 // CHECK:         %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
 // CHECK:         %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
+// CHECK:         %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
 // CHECK:         br label %[[VAL_18:.*]]
 // CHECK:       task.body:                                        ; preds = %[[VAL_19:.*]]
-// CHECK:         %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
-// CHECK:         %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
+// CHECK:         %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
 // CHECK:         br label %[[VAL_23:.*]]
 // CHECK:       omp.task.region:                                  ; preds = %[[VAL_18]]
-// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
-// CHECK:         store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
+// CHECK:         %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4
+// CHECK:         store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
 // CHECK:         br label %[[VAL_25:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = %[[VAL_23]]
 // CHECK:         tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])

>From 3f02adf573612ce884f1c3a964aafee9381c9afa Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 09:39:33 +0000
Subject: [PATCH 03/11] Add methods to operation to test if the mold arg is
 read

---
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  2 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 15 ++++++++++++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 20 +++----------------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index d725dfd3e94f3..6b0d783fb8c30 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -594,7 +594,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
           sym, cannotHaveNonDefaultLowerBounds);
       // TODO: currently there are false positives from dead uses of the mold
       // arg
-      if (!result.getInitMoldArg().getUses().empty())
+      if (result.initReadsFromMold())
         mightHaveReadHostSym.insert(sym);
     }
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 2d8e022190f62..ecff73f44b89a 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -146,13 +146,24 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
       return region.empty() ? nullptr : region.getArgument(0);
     }
 
+    /// Returns true if the init region might read from the mold argument
+    bool initReadsFromMold() {
+      BlockArgument moldArg = getInitMoldArg();
+      return moldArg ? !moldArg.use_empty() : false;
+    }
+
+    /// Returns true if any region of this privatizer might read from the mold
+    /// argument
+    bool readsFromMold() {
+      return initReadsFromMold() || !getCopyRegion().empty();
+    }
+
     /// needsMap returns true if the value being privatized should additionally
     /// be mapped to the target region using a MapInfoOp. This is most common
     /// when an allocatable is privatized. In such cases, the descriptor is used
     /// in privatization and needs to be mapped on to the device.
     bool needsMap() {
-      BlockArgument moldArg = getInitMoldArg();
-      return moldArg ? !moldArg.use_empty() : false;
+      return initReadsFromMold();
     }
 
     /// Get the type for arguments to nested regions. This should
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bbe4d58270305..c4e5514ac9ab4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1755,20 +1755,6 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
   }
 }
 
-static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
-  if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
-    return true;
-
-  Region &initRegion = priv.getInitRegion();
-  if (initRegion.empty())
-    return false;
-
-  BlockArgument sourceVariable = priv.getInitMoldArg();
-  if (!sourceVariable)
-    return false;
-  return !sourceVariable.use_empty();
-}
-
 namespace {
 /// TaskContextStructManager takes care of creating and freeing a structure
 /// containing information needed by the task body to execute.
@@ -1821,7 +1807,7 @@ void TaskContextStructManager::generateTaskContextStruct() {
   for (omp::PrivateClauseOp &privOp : privateDecls) {
     // Skip private variables which can safely be allocated and initialised
     // inside of the task
-    if (!privatizerReadsSourceVariable(privOp))
+    if (!privOp.readsFromMold())
       continue;
     Type mlirType = privOp.getType();
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
@@ -1853,7 +1839,7 @@ void TaskContextStructManager::createGEPsToPrivateVars(
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
   for (auto privDecl : privateDecls) {
-    if (!privatizerReadsSourceVariable(privDecl)) {
+    if (!privDecl.readsFromMold()) {
       // Handle this inside of the task so we don't pass unnessecary vars in
       llvmPrivateVars.push_back(nullptr);
       continue;
@@ -2008,7 +1994,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     llvm::BasicBlock *privInitBlock = nullptr;
     for (auto [blockArg, privDecl, mlirPrivVar] :
          llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
-      if (privatizerReadsSourceVariable(privDecl))
+      if (privDecl.readsFromMold())
         // This is handled before the task executes
         continue;
 

>From 08573d23cd3f7a498f167acd0f3e70d5df6cb947 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 09:39:59 +0000
Subject: [PATCH 04/11] Fix comment

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index c4e5514ac9ab4..0fce82c03cfda 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1767,10 +1767,9 @@ class TaskContextStructManager {
         privateDecls{privateDecls} {}
 
   /// Creates a heap allocated struct containing space for each private
-  /// variable. Returns nullptr if there are is no struct needed. Invariant:
-  /// privateVarTypes, privateDecls, and the elements of the structure should
-  /// all have the same order (although privateDecls which do not read from the
-  /// mold argument are skipped).
+  /// variable. Invariant: privateVarTypes, privateDecls, and the elements of
+  /// the structure should all have the same order (although privateDecls which
+  /// do not read from the mold argument are skipped).
   void generateTaskContextStruct();
 
   /// Create GEPs to access each member of the structure representing a private

>From a8ecb618de49346e3090b4fc7b56740686cfe87f Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 10:00:29 +0000
Subject: [PATCH 05/11] Make logic clearer

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0fce82c03cfda..42d6fe6046861 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1942,9 +1942,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
                        privateVarAllocations)) {
-    if (!llvmPrivateVarAlloc)
+    if (!privDecl.readsFromMold())
       // to be handled inside the task
       continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
 
     llvm::Expected<llvm::Value *> privateVarOrErr =
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,

>From 8ac248a112211467e2da48896a4956be5b867acc Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Feb 2025 15:48:58 +0000
Subject: [PATCH 06/11] Move llvm private vars into task context struct manager

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 42d6fe6046861..895791201ab68 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1458,7 +1458,7 @@ static LogicalResult
 copyFirstPrivateVars(llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation,
                      SmallVectorImpl<mlir::Value> &mlirPrivateVars,
-                     SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+                     ArrayRef<llvm::Value *> llvmPrivateVars,
                      SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
   // Apply copy region for firstprivate.
   bool needsFirstprivate =
@@ -1776,11 +1776,15 @@ class TaskContextStructManager {
   /// variable, adding them to llvmPrivateVars. Null values are added where
   /// private decls were skipped so that the ordering continues to match the
   /// private decls.
-  void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
+  void createGEPsToPrivateVars();
 
   /// De-allocate the task context structure.
   void freeStructPtr();
 
+  MutableArrayRef<llvm::Value *> getLLVMPrivateVars() {
+    return llvmPrivateVars;
+  }
+
   llvm::Value *getStructPtr() { return structPtr; }
 
 private:
@@ -1791,6 +1795,10 @@ class TaskContextStructManager {
   /// The type of each member of the structure, in order.
   SmallVector<llvm::Type *> privateVarTypes;
 
+  /// LLVM values for each private variable, or null if that private variable is
+  /// not included in the task context structure
+  SmallVector<llvm::Value *> llvmPrivateVars;
+
   /// A pointer to the structure containing context for this task.
   llvm::Value *structPtr = nullptr;
   /// The type of the structure
@@ -1826,14 +1834,14 @@ void TaskContextStructManager::generateTaskContextStruct() {
                                    "omp.task.context_ptr");
 }
 
-void TaskContextStructManager::createGEPsToPrivateVars(
-    SmallVectorImpl<llvm::Value *> &llvmPrivateVars) {
+void TaskContextStructManager::createGEPsToPrivateVars() {
   if (!structPtr) {
     assert(privateVarTypes.empty());
     return;
   }
 
   // Create GEPs for each struct member and initialize llvmPrivateVars to point
+  llvmPrivateVars.clear();
   llvmPrivateVars.reserve(privateVarTypes.size());
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
@@ -1929,7 +1937,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   builder.SetInsertPoint(initBlock->getTerminator());
 
   // Create task variable structure
-  llvm::SmallVector<llvm::Value *> privateVarAllocations;
   taskStructMgr.generateTaskContextStruct();
   // GEPs so that we can initialize the variables. Don't use these GEPs inside
   // of the body otherwise it will be the GEP not the struct which is fowarded
@@ -1937,11 +1944,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks
   // which may not be executed until after the current stack frame goes out of
   // scope.
-  taskStructMgr.createGEPsToPrivateVars(privateVarAllocations);
+  taskStructMgr.createGEPsToPrivateVars();
 
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
-                       privateVarAllocations)) {
+                       taskStructMgr.getLLVMPrivateVars())) {
     if (!privDecl.readsFromMold())
       // to be handled inside the task
       continue;
@@ -1976,7 +1983,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   // firstprivate copy region
   setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
   if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  privateVarAllocations, privateDecls)))
+                                  taskStructMgr.getLLVMPrivateVars(),
+                                  privateDecls)))
     return llvm::failure();
 
   // Set up for call to createTask()
@@ -2017,7 +2025,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
     // Find and map the addresses of each variable within the task context
     // structure
-    taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
+    taskStructMgr.createGEPsToPrivateVars();
+    llvm::copy(taskStructMgr.getLLVMPrivateVars(),
+               std::back_inserter(llvmPrivateVars));
     for (auto [blockArg, llvmPrivateVar] :
          llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
       if (!llvmPrivateVar)

>From fbba1f3011fbd1f44189b7adcccd133cd5ed6250 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 6 Feb 2025 09:57:01 +0000
Subject: [PATCH 07/11] Fix comment

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 895791201ab68..28d94da7f966f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1840,7 +1840,7 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
     return;
   }
 
-  // Create GEPs for each struct member and initialize llvmPrivateVars to point
+  // Create GEPs for each struct member
   llvmPrivateVars.clear();
   llvmPrivateVars.reserve(privateVarTypes.size());
   llvm::Value *zero = builder.getInt32(0);

>From 4e9cd7b44da1e89830e3dba1c6e953b43a91e780 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 6 Feb 2025 09:57:39 +0000
Subject: [PATCH 08/11] Reserve the correct size

---
 .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 28d94da7f966f..23ed2aa0dfff6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1842,7 +1842,7 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
 
   // Create GEPs for each struct member
   llvmPrivateVars.clear();
-  llvmPrivateVars.reserve(privateVarTypes.size());
+  llvmPrivateVars.reserve(privateDecls.size());
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
   for (auto privDecl : privateDecls) {

>From ffec9f4903a5967c2da6cf76110af7136f8e9b41 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 6 Feb 2025 10:02:08 +0000
Subject: [PATCH 09/11] Update charbox comment

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp     | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 23ed2aa0dfff6..ef2d898e3852a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1964,7 +1964,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     llvm::IRBuilderBase::InsertPointGuard guard(builder);
     builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
 
-    // TODO: this is a bit of a hack for Fortran character boxes
+    // TODO: this is a bit of a hack for Fortran character boxes.
+    // Character boxes are passed by value into the init region and then the
+    // initialized character box is yielded by value. Here we need to store the
+    // yielded value into the private allocation, and load the private
+    // allocation to match the type expected by region block arguments.
     if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
         !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
       builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);

>From bb0e213b21ba7700e0b1bcfa30cccffdc52da321 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 6 Feb 2025 10:03:27 +0000
Subject: [PATCH 10/11] Rename llvmPrivateVars to llvmPrivateVarGEPs

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ef2d898e3852a..302490de79a70 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1781,8 +1781,8 @@ class TaskContextStructManager {
   /// De-allocate the task context structure.
   void freeStructPtr();
 
-  MutableArrayRef<llvm::Value *> getLLVMPrivateVars() {
-    return llvmPrivateVars;
+  MutableArrayRef<llvm::Value *> getLLVMPrivateVarGEPs() {
+    return llvmPrivateVarGEPs;
   }
 
   llvm::Value *getStructPtr() { return structPtr; }
@@ -1797,7 +1797,7 @@ class TaskContextStructManager {
 
   /// LLVM values for each private variable, or null if that private variable is
   /// not included in the task context structure
-  SmallVector<llvm::Value *> llvmPrivateVars;
+  SmallVector<llvm::Value *> llvmPrivateVarGEPs;
 
   /// A pointer to the structure containing context for this task.
   llvm::Value *structPtr = nullptr;
@@ -1841,19 +1841,19 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
   }
 
   // Create GEPs for each struct member
-  llvmPrivateVars.clear();
-  llvmPrivateVars.reserve(privateDecls.size());
+  llvmPrivateVarGEPs.clear();
+  llvmPrivateVarGEPs.reserve(privateDecls.size());
   llvm::Value *zero = builder.getInt32(0);
   unsigned i = 0;
   for (auto privDecl : privateDecls) {
     if (!privDecl.readsFromMold()) {
       // Handle this inside of the task so we don't pass unnessecary vars in
-      llvmPrivateVars.push_back(nullptr);
+      llvmPrivateVarGEPs.push_back(nullptr);
       continue;
     }
     llvm::Value *iVal = builder.getInt32(i);
     llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
-    llvmPrivateVars.push_back(gep);
+    llvmPrivateVarGEPs.push_back(gep);
     i += 1;
   }
 }
@@ -1948,7 +1948,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
 
   for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
        llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
-                       taskStructMgr.getLLVMPrivateVars())) {
+                       taskStructMgr.getLLVMPrivateVarGEPs())) {
     if (!privDecl.readsFromMold())
       // to be handled inside the task
       continue;
@@ -1987,7 +1987,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   // firstprivate copy region
   setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
   if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                  taskStructMgr.getLLVMPrivateVars(),
+                                  taskStructMgr.getLLVMPrivateVarGEPs(),
                                   privateDecls)))
     return llvm::failure();
 
@@ -2030,7 +2030,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     // Find and map the addresses of each variable within the task context
     // structure
     taskStructMgr.createGEPsToPrivateVars();
-    llvm::copy(taskStructMgr.getLLVMPrivateVars(),
+    llvm::copy(taskStructMgr.getLLVMPrivateVarGEPs(),
                std::back_inserter(llvmPrivateVars));
     for (auto [blockArg, llvmPrivateVar] :
          llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {

>From 459d2d5ea7f803cc6553b7d9007283281263f1fe Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 24 Feb 2025 11:21:43 +0000
Subject: [PATCH 11/11] Fix skatrak's comments

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  2 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 40 +++++++++++--------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index ecff73f44b89a..f5a8a7ba04375 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -149,7 +149,7 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
     /// Returns true if the init region might read from the mold argument
     bool initReadsFromMold() {
       BlockArgument moldArg = getInitMoldArg();
-      return moldArg ? !moldArg.use_empty() : false;
+      return moldArg && !moldArg.use_empty();
     }
 
     /// Returns true if any region of this privatizer might read from the mold
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 302490de79a70..b163332c2f731 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1347,9 +1347,8 @@ static llvm::Expected<llvm::Value *> initPrivateVar(
     llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock,
     llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
   Region &initRegion = privDecl.getInitRegion();
-  if (initRegion.empty()) {
+  if (initRegion.empty())
     return llvmPrivateVar;
-  }
 
   // map initialization region block arguments
   llvm::Value *nonPrivateVar = findAssociatedValue(
@@ -1399,8 +1398,8 @@ initPrivateVars(llvm::IRBuilderBase &builder,
         builder, moduleTranslation, privDecl, mlirPrivVar, blockArg,
         llvmPrivateVar, privInitBlock, mappedPrivateVars);
 
-    if (auto err = privVarOrErr.takeError())
-      return err;
+    if (!privVarOrErr)
+      return privVarOrErr.takeError();
 
     llvmPrivateVar = privVarOrErr.get();
     moduleTranslation.mapValue(blockArg, llvmPrivateVar);
@@ -2005,13 +2004,15 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
     builder.restoreIP(codegenIP);
 
     llvm::BasicBlock *privInitBlock = nullptr;
-    for (auto [blockArg, privDecl, mlirPrivVar] :
-         llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
+    llvmPrivateVars.resize(privateBlockArgs.size());
+    for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
+             privateBlockArgs, privateDecls, mlirPrivateVars))) {
+      auto [blockArg, privDecl, mlirPrivVar] = zip;
+      // This is handled before the task executes
       if (privDecl.readsFromMold())
-        // This is handled before the task executes
         continue;
 
-      auto codegenInsertionPt = builder.saveIP();
+      llvm::IRBuilderBase::InsertPointGuard guard(builder);
       llvm::Type *llvmAllocType =
           moduleTranslation.convertType(privDecl.getType());
       builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
@@ -2024,18 +2025,25 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
       if (auto err = privateVarOrError.takeError())
         return err;
       moduleTranslation.mapValue(blockArg, privateVarOrError.get());
-      builder.restoreIP(codegenInsertionPt);
+      llvmPrivateVars[i] = privateVarOrError.get();
+    }
+
+    taskStructMgr.createGEPsToPrivateVars();
+    for (auto [i, llvmPrivVar] :
+         llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
+      if (!llvmPrivVar) {
+        assert(llvmPrivateVars[i] && "This is added in the loop above");
+        continue;
+      }
+      llvmPrivateVars[i] = llvmPrivVar;
     }
 
     // Find and map the addresses of each variable within the task context
     // structure
-    taskStructMgr.createGEPsToPrivateVars();
-    llvm::copy(taskStructMgr.getLLVMPrivateVarGEPs(),
-               std::back_inserter(llvmPrivateVars));
-    for (auto [blockArg, llvmPrivateVar] :
-         llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
-      if (!llvmPrivateVar)
-        // This was handled above
+    for (auto [blockArg, llvmPrivateVar, privateDecl] :
+         llvm::zip_equal(privateBlockArgs, llvmPrivateVars, privateDecls)) {
+      // This was handled above.
+      if (!privateDecl.readsFromMold())
         continue;
       // Fix broken pass-by-value case for Fortran character boxes
       if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {



More information about the llvm-branch-commits mailing list