[llvm-branch-commits] [llvm] [mlir] [MLIR][OpenMP] Add Initial Taskloop Clause Support (PR #174623)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 6 09:10:05 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Jack Styles (Stylie777)
<details>
<summary>Changes</summary>
Following on from the work to implement MLIR -> LLVM IR Translation for Taskloop, this adds support for the following clauses to be used alongside taskloop:
- if
- grainsize
- num_tasks
- untied
- Nogroup
- Final
- Mergeable
- Priority
These clauses are ones which work directly through the relevant OpenMP Runtime functions, so their information just needed collecting from the relevant location and passing through to the appropriate runtime function.
Remaining clauses retain their TODO message as they have not yet been implemented.
---
Patch is 60.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174623.diff
14 Files Affected:
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+20-3)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+41-17)
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+20-33)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir (+88)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir (+88)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir (+93)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir (+87)
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir (+1-1)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir (+87)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir (+88)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir (+88)
- (added) mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir (+161)
- (modified) mlir/test/Target/LLVMIR/openmp-taskloop.mlir (+1-1)
- (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (-104)
``````````diff
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 7ba7a05ac71a3..a0b26963353b1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1464,7 +1464,22 @@ class OpenMPIRBuilder {
/// \param LBVal Lowerbound value of loop
/// \param UBVal Upperbound value of loop
/// \param StepVal Step value of loop
- /// \param Tied True if the task is tied, false if the task is untied.
+ /// \param Untied True if the task is untied, false if the task is tied.
+ /// \param IfCond i1 value. If it evaluates to `false`, an undeferred
+ /// task is generated, and the encountering thread must
+ /// suspend the current task region, for which execution
+ /// cannot be resumed until execution of the structured
+ /// block that is associated with the generated task is
+ /// completed.
+ /// \param GrainSize Value of the GrainSize/Num of Tasks if present
+ /// \param NoGroup False if NoGroup is defined, true if not
+ /// \param Sched If Grainsize is defined, Sched is 1. Num Tasks, Shed is 2.
+ /// Otherwise Sched is 0
+ /// \param Final i1 value which is `true` if the task is final, `false` if the
+ /// task is not final.
+ /// \param Mergeable If the given task is `mergeable`
+ /// \param Priority `priority-value' specifies the execution order of the
+ /// tasks that is generated by the construct
/// \param DupCB The callback to generate the duplication code. See
/// documentation for \ref TaskDupCallbackTy. This can be nullptr.
/// \param TaskContextStructPtrVal If non-null, a pointer to to be placed
@@ -1474,8 +1489,10 @@ class OpenMPIRBuilder {
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB,
llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
- Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true,
- TaskDupCallbackTy DupCB = nullptr,
+ Value *LBVal, Value *UBVal, Value *StepVal, bool Untied = false,
+ Value *IfCond = nullptr, Value *GrainSize = nullptr, bool NoGroup = false,
+ int Sched = 0, Value *Final = nullptr, bool Mergeable = false,
+ Value *Priority = nullptr, TaskDupCallbackTy DupCB = nullptr,
Value *TaskContextStructPtrVal = nullptr);
/// Generator for `#omp task`
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a71bcb0bb5392..580daed4f7d10 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2096,8 +2096,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB,
llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
- Value *LBVal, Value *UBVal, Value *StepVal, bool Tied,
- TaskDupCallbackTy DupCB, Value *TaskContextStructPtrVal) {
+ Value *LBVal, Value *UBVal, Value *StepVal, bool Untied, Value *IfCond,
+ Value *GrainSize, bool NoGroup, int Sched, Value *Final, bool Mergeable,
+ Value *Priority, TaskDupCallbackTy DupCB, Value *TaskContextStructPtrVal) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2172,9 +2173,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
}
Value *TaskDupFn = *TaskDupFnOrErr;
- OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+ OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Untied,
TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
- FakeLB, FakeUB, FakeStep](Function &OutlinedFn) mutable {
+ IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
+ FakeStep, Final, Mergeable,
+ Priority](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
"there must be a single user for the outlined function");
@@ -2207,8 +2210,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
- // The flags are set to 1 if the task is tied, 0 otherwise.
- Value *Flags = Builder.getInt32(Tied);
+ // `flags` Argument Configuration
+ // Task is tied if (Flags & 1) == 1.
+ // Task is untied if (Flags & 1) == 0.
+ // Task is final if (Flags & 2) == 2.
+ // Task is not final if (Flags & 2) == 0.
+ // Task is mergeable if (Flags & 4) == 4.
+ // Task is not mergeable if (Flags & 4) == 0.
+ // Task is priority if (Flags & 32) == 32.
+ // Task is not priority if (Flags & 32) == 0.
+ Value *Flags = Builder.getInt32(Untied ? 0 : 1);
+ if (Final)
+ Flags = Builder.CreateOr(Builder.getInt32(2), Flags);
+ if (Mergeable)
+ Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
+ if (Priority)
+ Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
Value *TaskSize = Builder.getInt64(
divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
@@ -2251,25 +2268,32 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
llvm::Value *Loadstep = Builder.CreateLoad(Builder.getInt64Ty(), Step);
// set up the arguments for emitting kmpc_taskloop runtime call
- // setting default values for ifval, nogroup, sched, grainsize, task_dup
- Value *IfVal = Builder.getInt32(1);
- Value *NoGroup = Builder.getInt32(1);
- Value *Sched = Builder.getInt32(0);
- Value *GrainSize = Builder.getInt64(0);
+ // setting values for ifval, nogroup, sched, grainsize, task_dup
+ Value *IfCondVal =
+ IfCond ? Builder.CreateIntCast(IfCond, Builder.getInt32Ty(), true)
+ : Builder.getInt32(1);
+ Value *NoGroupVal = Builder.getInt32(NoGroup ? 1 : 0);
+ Value *SchedVal = Builder.getInt32(Sched);
+ Value *GrainSizeVal =
+ GrainSize ? Builder.CreateIntCast(GrainSize, Builder.getInt64Ty(), true)
+ : Builder.getInt64(0);
Value *TaskDup = TaskDupFn;
- Value *Args[] = {Ident, ThreadID, TaskData, IfVal, Lb, Ub,
- Loadstep, NoGroup, Sched, GrainSize, TaskDup};
+ Value *Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
+ Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
// taskloop runtime call
Function *TaskloopFn =
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
Builder.CreateCall(TaskloopFn, Args);
- // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
- Function *EndTaskgroupFn =
- getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
- Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+ // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup if
+ // nogroup is not defined
+ if (!NoGroup) {
+ Function *EndTaskgroupFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+ Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+ }
StaleCI->eraseFromParent();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6f4e9e3b2330a..eef71a17fb41d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -361,22 +361,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (op.getFinal())
result = todo("final");
};
- auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
- if (op.getGrainsize())
- result = todo("grainsize");
- };
- auto checkIf = [&todo](auto op, LogicalResult &result) {
- if (op.getIfExpr())
- result = todo("if");
- };
- auto checkMergeable = [&todo](auto op, LogicalResult &result) {
- if (op.getMergeable())
- result = todo("mergeable");
- };
- auto checkNogroup = [&todo](auto op, LogicalResult &result) {
- if (op.getNogroup())
- result = todo("nogroup");
- };
auto checkHint = [](auto op, LogicalResult &) {
if (op.getHint())
op.emitWarning("hint clause discarded");
@@ -390,10 +374,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (op.getNowait())
result = todo("nowait");
};
- auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
- if (op.getNumTasks())
- result = todo("num_tasks");
- };
auto checkOrder = [&todo](auto op, LogicalResult &result) {
if (op.getOrder() || op.getOrderMod())
result = todo("order");
@@ -424,10 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
op.getTaskReductionSyms())
result = todo("task_reduction");
};
- auto checkUntied = [&todo](auto op, LogicalResult &result) {
- if (op.getUntied())
- result = todo("untied");
- };
LogicalResult result = success();
llvm::TypeSwitch<Operation &>(op)
@@ -471,16 +447,8 @@ static LogicalResult checkImplementationStatus(Operation &op) {
})
.Case([&](omp::TaskloopOp op) {
checkAllocate(op, result);
- checkFinal(op, result);
- checkGrainsize(op, result);
- checkIf(op, result);
checkInReduction(op, result);
- checkMergeable(op, result);
- checkNogroup(op, result);
- checkNumTasks(op, result);
checkReduction(op, result);
- checkUntied(op, result);
- checkPriority(op, result);
})
.Case([&](omp::WsloopOp op) {
checkAllocate(op, result);
@@ -2767,6 +2735,21 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
return loopInfo;
};
+ llvm::Value *ifCond = nullptr;
+ llvm::Value *grainsize = nullptr;
+ int sched = 0; // default
+ Value grainsizeVal = taskloopOp.getGrainsize();
+ Value numTasksVal = taskloopOp.getNumTasks();
+ if (Value ifVar = taskloopOp.getIfExpr())
+ ifCond = moduleTranslation.lookupValue(ifVar);
+ if (grainsizeVal) {
+ grainsize = moduleTranslation.lookupValue(grainsizeVal);
+ sched = 1; // grainsize
+ } else if (numTasksVal) {
+ grainsize = moduleTranslation.lookupValue(numTasksVal);
+ sched = 2; // num_tasks
+ }
+
llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr;
if (!taskStructMgr.getLLVMPrivateVarGEPs().empty())
taskDupOrNull = taskDupCB;
@@ -2778,7 +2761,11 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[0]),
moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[0]),
moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]),
- /*Tied=*/true, taskDupOrNull, taskStructMgr.getStructPtr());
+ taskloopOp.getUntied(), ifCond, grainsize, taskloopOp.getNogroup(),
+ sched, moduleTranslation.lookupValue(taskloopOp.getFinal()),
+ taskloopOp.getMergeable(),
+ moduleTranslation.lookupValue(taskloopOp.getPriority()),
+ taskDupOrNull, taskStructMgr.getStructPtr());
if (failed(handleError(afterIP, opInst)))
return failure();
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
new file mode 100644
index 0000000000000..c1202d1ffab7b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> i32
+ llvm.store %0, %arg1 : i32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(20 : i32) : i32
+ llvm.store %6, %3 : i32, !llvm.ptr
+ %7 = llvm.mlir.constant(1 : i32) : i32
+ %8 = llvm.mlir.constant(5 : i32) : i32
+ %9 = llvm.mlir.constant(1 : i32) : i32
+ %true = llvm.mlir.constant(true) : i1
+ omp.taskloop final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+ omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+ llvm.store %arg2, %arg1 : i32, !llvm.ptr
+ %10 = llvm.load %arg0 : !llvm.ptr -> i32
+ %11 = llvm.mlir.constant(1 : i32) : i32
+ %12 = llvm.add %10, %11 : i32
+ llvm.store %12, %arg0 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK: %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK: store i32 20, ptr %2, align 4
+// CHECK: br label %[[entry:.*]]
+
+// CHECK: entry: ; preds = %0
+// CHECK: br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init: ; preds = %[[entry:.*]]
+// CHECK: %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK: %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK: br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy: ; preds = %[[omp_private_init]]
+// CHECK: br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1: ; preds = %[[omp_private_copy]]
+// CHECK: %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK: store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK: br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start: ; preds = %[[omp_private_copy1]]
+// CHECK: br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl: ; preds = %[[omp_taskloop_start]]
+// CHECK: %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK: store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK: %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK: store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK: %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK: store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK: %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK: store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK: %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK: %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 3, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK: %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK: %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK: %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK: %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK: %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK: call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK: call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK: br label %taskloop.exit
+
+// CHECK: taskloop.exit: ; preds = %[[codeRepl]]
+// CHECK: ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
new file mode 100644
index 0000000000000..a274e82adba76
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> i32
+ llvm.store %0, %arg1 : i32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(20 : i32) : i32
+ llvm.store %6, %3 : i32, !llvm.ptr
+ %7 = llvm.mlir.constant(1 : i32) : i32
+ %8 = llvm.mlir.constant(5 : i32) : i32
+ %9 = llvm.mlir.constant(1 : i32) : i32
+ %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+ omp.taskloop grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+ omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+ llvm.store %arg2, %arg1 : i32, !llvm.ptr
+ %10 = llvm.load %arg0 : !llvm.ptr -> i32
+ %11 = llvm.mlir.constant(1 : i32) : i32
+ %12 = llvm.add %10, %11 : i32
+ llvm.store %12, %arg0 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK: %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK: store i32 20, ptr %2, align 4
+// CHECK: br label %[[entry:.*]]
+
+// CHECK: entry: ; preds = %0
+// CHECK: br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init: ; preds = %[[entry:.*]]
+// CHECK: %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK: %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK: br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy: ; preds = %[[omp_private_init]]
+// CHECK: br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1: ; preds = %[[omp_private_copy]]
+// CHECK: %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK: store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK: br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start: ; preds = %[[omp_private_copy1]]
+// CHECK: br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl: ; preds = %[[omp_taskloop_start]]
+// CHECK: %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK: store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK: %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK: store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK: %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK: store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK: %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK: store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK: %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK: %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK: %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174623
More information about the llvm-branch-commits
mailing list