[llvm-branch-commits] [mlir] [mlir][OpenMP] initialize (first)private variables before task exec (PR #125304)

Tom Eccles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jan 31 13:52:25 PST 2025


https://github.com/tblah created https://github.com/llvm/llvm-project/pull/125304

This still doesn't fix the memory safety issues because the stack allocations created here for the private variables might go out of scope.

I will add a more complete lit test later in this patch series.

>From c5f10057ff564a4fdc64673f73474ae20f2ea574 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Fri, 24 Jan 2025 16:00:53 +0000
Subject: [PATCH] [mlir][OpenMP] initialize (first)private variables before
 task exec

This still doesn't fix the memory safety issues because the stack
allocations created here for the private variables might go out of
scope.

I will add a more complete lit test later in this patch series.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 89 +++++++++++++++----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 48 +++++-----
 2 files changed, 96 insertions(+), 41 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ea044fe0c8c196c..8a9a69cefad8ee1 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1750,25 +1750,80 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   for (mlir::Value privateVar : taskOp.getPrivateVars())
     mlirPrivateVars.push_back(privateVar);
 
-  auto bodyCB = [&](InsertPointTy allocaIP,
-                    InsertPointTy codegenIP) -> llvm::Error {
-    // Save the alloca insertion point on ModuleTranslation stack for use in
-    // nested regions.
-    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
-        moduleTranslation, allocaIP);
+  // Allocate and copy private variables before creating the task. This avoids
+  // accessing invalid memory if (after this scope ends) the private variables
+  // are initialized from host variables or if the variables are copied into
+  // from host variables (firstprivate). The insertion point is just before
+  // where the code for creating and scheduling the task will go. That puts this
+  // code outside of the outlined task region, which is what we want because
+  // this way the initialization and copy regions are executed immediately while
+  // the host variable data are still live.
 
-    llvm::Expected<llvm::BasicBlock *> afterAllocas =
-        allocateAndInitPrivateVars(builder, moduleTranslation, privateBlockArgs,
-                                   privateDecls, mlirPrivateVars,
-                                   llvmPrivateVars, allocaIP);
-    if (handleError(afterAllocas, *taskOp).failed())
-      return llvm::make_error<PreviouslyReportedError>();
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
 
-    if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
-                                    llvmPrivateVars, privateDecls,
-                                    afterAllocas.get())))
-      return llvm::make_error<PreviouslyReportedError>();
+  // Not using splitBB() because that requires the current block to have a
+  // terminator.
+  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+  llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
+      builder.getContext(), "omp.task.start",
+      /*Parent=*/builder.GetInsertBlock()->getParent());
+  llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
+  builder.SetInsertPoint(branchToTaskStartBlock);
+
+  // Now do this again to make the initialization and copy blocks
+  llvm::BasicBlock *copyBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+  llvm::BasicBlock *initBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+  // Now the control flow graph should look like
+  // starter_block:
+  //   <---- where we started when convertOmpTaskOp was called
+  //   br %omp.private.init
+  // omp.private.init:
+  //   br %omp.private.copy
+  // omp.private.copy:
+  //   br %omp.task.start
+  // omp.task.start:
+  //   <---- where we want the insertion point to be when we call createTask()
+
+  // Save the alloca insertion point on ModuleTranslation stack for use in
+  // nested regions.
+  LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+      moduleTranslation, allocaIP);
+
+  // Allocate and initialize private variables
+  // TODO: package private variables up in a structure
+  builder.SetInsertPoint(initBlock->getTerminator());
+  for (auto [privDecl, mlirPrivVar, blockArg] :
+       llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
+    llvm::Type *llvmAllocType =
+        moduleTranslation.convertType(privDecl.getType());
 
+    // Allocations:
+    builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+    llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+        llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+    // builder.SetInsertPoint(initBlock->getTerminator());
+    auto err =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVar, llvmPrivateVars, initBlock);
+    if (err)
+      return handleError(std::move(err), *taskOp.getOperation());
+  }
+
+  // firstprivate copy region
+  if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+                                  llvmPrivateVars, privateDecls, copyBlock)))
+    return llvm::failure();
+
+  // Set up for call to createTask()
+  builder.SetInsertPoint(taskStartBlock);
+
+  auto bodyCB = [&](InsertPointTy allocaIP,
+                    InsertPointTy codegenIP) -> llvm::Error {
     // translate the body of the task:
     builder.restoreIP(codegenIP);
     auto continuationBlockOrError = convertOmpOpRegions(
@@ -1789,8 +1844,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
                   moduleTranslation, dds);
 
-  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
-      findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTask(
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 8a95793b96fd531..b4f0dfc46471a53 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2790,11 +2790,14 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
 }
 
 // CHECK-LABEL: @par_task_
+// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
 // CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]])
 // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
-// CHECK: define internal void @[[task_outlined_fn]]
-// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
-// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]])
+// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
+// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
+// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
+// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
+// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
 // CHECK: define internal void @[[parallel_outlined_fn]]
 // -----
 
@@ -2820,27 +2823,18 @@ llvm.func @task(%arg0 : !llvm.ptr) {
   llvm.return
 }
 // CHECK-LABEL: @task..omp_par
-// CHECK:       task.alloca:
-// CHECK:         %[[VAL_11:.*]] = load ptr, ptr %[[VAL_12:.*]], align 8
-// CHECK:         %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_11]], i32 0, i32 0
+// CHECK:      task.alloca:
+// CHECK:         %[[VAL_12:.*]] = load ptr, ptr %[[STRUCT_ARG:.*]], align 8
+// CHECK:         %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_12]], i32 0, i32 0
 // CHECK:         %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8
-// CHECK:         %[[VAL_15:.*]] = alloca i32, align 4
-// CHECK:         br label %omp.private.init
-// CHECK:       omp.private.init:                                 ; preds = %task.alloca
-// CHECK:         br label %omp.private.copy
-// CHECK:       omp.private.copy:                                 ; preds = %omp.private.init
-// CHECK:         %[[VAL_19:.*]] = load i32, ptr %[[VAL_14]], align 4
-// CHECK:         store i32 %[[VAL_19]], ptr %[[VAL_15]], align 4
-// CHECK:         br label %[[VAL_20:.*]]
-// CHECK:       [[VAL_20]]:
 // CHECK:         br label %task.body
-// CHECK:       task.body:                                        ; preds = %[[VAL_20]]
+// CHECK:       task.body:                                        ; preds = %task.alloca
 // CHECK:         br label %omp.task.region
 // CHECK:       omp.task.region:                                  ; preds = %task.body
-// CHECK:         call void @foo(ptr %[[VAL_15]])
+// CHECK:         call void @foo(ptr %[[VAL_14]])
 // CHECK:         br label %omp.region.cont
 // CHECK:       omp.region.cont:                                  ; preds = %omp.task.region
-// CHECK:         call void @destroy(ptr %[[VAL_15]])
+// CHECK:         call void @destroy(ptr %[[VAL_14]])
 // CHECK:         br label %task.exit.exitStub
 // CHECK:       task.exit.exitStub:                               ; preds = %omp.region.cont
 // CHECK:         ret void
@@ -2910,6 +2904,19 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
 // CHECK:         br label %[[omp_region_cont:[^,]+]]
 // CHECK:       [[omp_taskgroup_region]]:
 // CHECK:         %{{.+}} = alloca i8, align 1
+// CHECK:         br label %[[omp_private_init:[^,]+]]
+// CHECK:       [[omp_private_init]]:
+// CHECK:         br label %[[omp_private_copy:[^,]+]]
+// CHECK:       [[omp_private_copy]]:
+// CHECK:         br label %[[omp_task_start:[^,]+]]
+
+// CHECK:       [[omp_region_cont:[^,]+]]:
+// CHECK:         br label %[[taskgroup_exit:[^,]+]]
+// CHECK:       [[taskgroup_exit]]:
+// CHECK:         call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK:         ret void
+
+// CHECK:       [[omp_task_start]]:
 // CHECK:         br label %[[codeRepl:[^,]+]]
 // CHECK:       [[codeRepl]]:
 // CHECK:         %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
@@ -2933,11 +2940,6 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
 // CHECK:         br label %[[task_exit3:[^,]+]]
 // CHECK:       [[task_exit3]]:
 // CHECK:         br label %[[omp_taskgroup_region1]]
-// CHECK:       [[omp_region_cont]]:
-// CHECK:         br label %[[taskgroup_exit:[^,]+]]
-// CHECK:       [[taskgroup_exit]]:
-// CHECK:         call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
-// CHECK:         ret void
 // CHECK:       }
 
 // -----



More information about the llvm-branch-commits mailing list