[llvm-branch-commits] [mlir] [mlir][OpenMP] initialize (first)private variables before task exec (PR #125304)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 31 13:53:33 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-mlir-openmp
Author: Tom Eccles (tblah)
<details>
<summary>Changes</summary>
This still doesn't fix the memory safety issues because the stack allocations created here for the private variables might go out of scope.
I will add a more complete lit test later in this patch series.
---
Full diff: https://github.com/llvm/llvm-project/pull/125304.diff
2 Files Affected:
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+71-18)
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+25-23)
``````````diff
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ea044fe0c8c196..8a9a69cefad8ee 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1750,25 +1750,80 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
for (mlir::Value privateVar : taskOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);
- auto bodyCB = [&](InsertPointTy allocaIP,
- InsertPointTy codegenIP) -> llvm::Error {
- // Save the alloca insertion point on ModuleTranslation stack for use in
- // nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ // Allocate and copy private variables before creating the task. This avoids
+ // accessing invalid memory if (after this scope ends) the private variables
+ // are initialized from host variables or if the variables are copied into
+ // from host variables (firstprivate). The insertion point is just before
+ // where the code for creating and scheduling the task will go. That puts this
+ // code outside of the outlined task region, which is what we want because
+ // this way the initialization and copy regions are executed immediately while
+ // the host variable data are still live.
- llvm::Expected<llvm::BasicBlock *> afterAllocas =
- allocateAndInitPrivateVars(builder, moduleTranslation, privateBlockArgs,
- privateDecls, mlirPrivateVars,
- llvmPrivateVars, allocaIP);
- if (handleError(afterAllocas, *taskOp).failed())
- return llvm::make_error<PreviouslyReportedError>();
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+ findAllocaInsertPoint(builder, moduleTranslation);
- if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
- llvmPrivateVars, privateDecls,
- afterAllocas.get())))
- return llvm::make_error<PreviouslyReportedError>();
+ // Not using splitBB() because that requires the current block to have a
+ // terminator.
+ assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+ llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
+ builder.getContext(), "omp.task.start",
+ /*Parent=*/builder.GetInsertBlock()->getParent());
+ llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
+ builder.SetInsertPoint(branchToTaskStartBlock);
+
+ // Now do this again to make the initialization and copy blocks
+ llvm::BasicBlock *copyBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+ llvm::BasicBlock *initBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+ // Now the control flow graph should look like
+ // starter_block:
+ // <---- where we started when convertOmpTaskOp was called
+ // br %omp.private.init
+ // omp.private.init:
+ // br %omp.private.copy
+ // omp.private.copy:
+ // br %omp.task.start
+ // omp.task.start:
+ // <---- where we want the insertion point to be when we call createTask()
+
+ // Save the alloca insertion point on ModuleTranslation stack for use in
+ // nested regions.
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+ moduleTranslation, allocaIP);
+
+ // Allocate and initialize private variables
+ // TODO: package private variables up in a structure
+ builder.SetInsertPoint(initBlock->getTerminator());
+ for (auto [privDecl, mlirPrivVar, blockArg] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
+ llvm::Type *llvmAllocType =
+ moduleTranslation.convertType(privDecl.getType());
+ // Allocations:
+ builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+ llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+ llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+ // builder.SetInsertPoint(initBlock->getTerminator());
+ auto err =
+ initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+ blockArg, llvmPrivateVar, llvmPrivateVars, initBlock);
+ if (err)
+ return handleError(std::move(err), *taskOp.getOperation());
+ }
+
+ // firstprivate copy region
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls, copyBlock)))
+ return llvm::failure();
+
+ // Set up for call to createTask()
+ builder.SetInsertPoint(taskStartBlock);
+
+ auto bodyCB = [&](InsertPointTy allocaIP,
+ InsertPointTy codegenIP) -> llvm::Error {
// translate the body of the task:
builder.restoreIP(codegenIP);
auto continuationBlockOrError = convertOmpOpRegions(
@@ -1789,8 +1844,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
moduleTranslation, dds);
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTask(
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 8a95793b96fd53..b4f0dfc46471a5 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2790,11 +2790,14 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
}
// CHECK-LABEL: @par_task_
+// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]])
// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
-// CHECK: define internal void @[[task_outlined_fn]]
-// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
-// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]])
+// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
+// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
+// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
+// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
+// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
// CHECK: define internal void @[[parallel_outlined_fn]]
// -----
@@ -2820,27 +2823,18 @@ llvm.func @task(%arg0 : !llvm.ptr) {
llvm.return
}
// CHECK-LABEL: @task..omp_par
-// CHECK: task.alloca:
-// CHECK: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_12:.*]], align 8
-// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_11]], i32 0, i32 0
+// CHECK: task.alloca:
+// CHECK: %[[VAL_12:.*]] = load ptr, ptr %[[STRUCT_ARG:.*]], align 8
+// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_12]], i32 0, i32 0
// CHECK: %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8
-// CHECK: %[[VAL_15:.*]] = alloca i32, align 4
-// CHECK: br label %omp.private.init
-// CHECK: omp.private.init: ; preds = %task.alloca
-// CHECK: br label %omp.private.copy
-// CHECK: omp.private.copy: ; preds = %omp.private.init
-// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_14]], align 4
-// CHECK: store i32 %[[VAL_19]], ptr %[[VAL_15]], align 4
-// CHECK: br label %[[VAL_20:.*]]
-// CHECK: [[VAL_20]]:
// CHECK: br label %task.body
-// CHECK: task.body: ; preds = %[[VAL_20]]
+// CHECK: task.body: ; preds = %task.alloca
// CHECK: br label %omp.task.region
// CHECK: omp.task.region: ; preds = %task.body
-// CHECK: call void @foo(ptr %[[VAL_15]])
+// CHECK: call void @foo(ptr %[[VAL_14]])
// CHECK: br label %omp.region.cont
// CHECK: omp.region.cont: ; preds = %omp.task.region
-// CHECK: call void @destroy(ptr %[[VAL_15]])
+// CHECK: call void @destroy(ptr %[[VAL_14]])
// CHECK: br label %task.exit.exitStub
// CHECK: task.exit.exitStub: ; preds = %omp.region.cont
// CHECK: ret void
@@ -2910,6 +2904,19 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
// CHECK: br label %[[omp_region_cont:[^,]+]]
// CHECK: [[omp_taskgroup_region]]:
// CHECK: %{{.+}} = alloca i8, align 1
+// CHECK: br label %[[omp_private_init:[^,]+]]
+// CHECK: [[omp_private_init]]:
+// CHECK: br label %[[omp_private_copy:[^,]+]]
+// CHECK: [[omp_private_copy]]:
+// CHECK: br label %[[omp_task_start:[^,]+]]
+
+// CHECK: [[omp_region_cont:[^,]+]]:
+// CHECK: br label %[[taskgroup_exit:[^,]+]]
+// CHECK: [[taskgroup_exit]]:
+// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
+// CHECK: ret void
+
+// CHECK: [[omp_task_start]]:
// CHECK: br label %[[codeRepl:[^,]+]]
// CHECK: [[codeRepl]]:
// CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
@@ -2933,11 +2940,6 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
// CHECK: br label %[[task_exit3:[^,]+]]
// CHECK: [[task_exit3]]:
// CHECK: br label %[[omp_taskgroup_region1]]
-// CHECK: [[omp_region_cont]]:
-// CHECK: br label %[[taskgroup_exit:[^,]+]]
-// CHECK: [[taskgroup_exit]]:
-// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
-// CHECK: ret void
// CHECK: }
// -----
``````````
</details>
https://github.com/llvm/llvm-project/pull/125304
More information about the llvm-branch-commits
mailing list