[llvm-branch-commits] [OpenMP] Extend taskgraph shared-data relocation to taskloop (PR #200405)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 29 06:55:48 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-flang-openmp
Author: Julian Brown (jtb20)
<details>
<summary>Changes</summary>
Build on the per-task relocation infrastructure introduced for
'omp task' in the previous patch and apply the same scheme to
taskloop tasks recorded inside a taskgraph. Without this,
by-reference captures inside a recorded taskloop iteration still
point at the original recording's stack on each replay.
On the compiler side (CGOpenMPRuntime.cpp), emit a relocation
thunk for the taskloop's captured statement via the shared
emitTaskRelocationFunction helper and pass it as the new trailing
argument of __kmpc_taskgraph_taskloop. The now-unused 'shareds'
and 'sizeof_shareds' parameters are dropped from the call and from
TGTaskLoopArgs.
On the runtime side, update __kmpc_taskgraph_taskloop to match
the new ABI (drop the dead 'shareds' / 'sizeof_shareds'
parameters, add the trailing kmp_task_relocate_t parameter) and
plumb the callback through the taskgraph variant of
__kmp_taskloop and __kmp_taskloop_linear so every recorded
subtask gets node->relocate = reloc, mirroring the explicit-task
path. Non-taskgraph callers pass a default nullptr and are
unaffected.
Add taskloop counterparts of the omp-task runtime tests added in
the previous patch.
Assisted-By: Claude Opus 4.7
---
Patch is 31.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/200405.diff
14 Files Affected:
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-13)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+2-2)
- (modified) openmp/runtime/src/kmp.h (+1-2)
- (modified) openmp/runtime/src/kmp_tasking.cpp (+13-10)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp (+43)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp (+85)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp (+57)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp (+83)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp (+42)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_recursive_taskloop.cpp (+70)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_works_taskloop.cpp (+41)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_1_taskloop.cpp (+46)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_2_taskloop.cpp (+59)
- (added) openmp/runtime/test/taskgraph/taskgraph_replayable_saved_stack_depth_taskloop.cpp (+126)
``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9f342038f2285..ee8583a9f5519 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5152,7 +5152,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(
PrePostActionTy &) {
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
- std::array<llvm::Value *, 16> TGTaskLoopArgs;
+ std::array<llvm::Value *, 14> TGTaskLoopArgs;
// This is all copy/pasted from below. Refactor!
LValue LBLVal = CGF.EmitLValueForField(
@@ -5197,30 +5197,34 @@ void CGOpenMPRuntime::emitTaskLoopCall(
TGTaskLoopArgs[1] = ThreadId;
TGTaskLoopArgs[2] = TaskInitResult.NewTask;
TGTaskLoopArgs[3] = TaskAllocArgs[0]; // TaskFlags
- TGTaskLoopArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize
- TGTaskLoopArgs[5] = Shareds.emitRawPointer(CGF);
- TGTaskLoopArgs[6] = TaskAllocArgs[2]; // SharedsSize
- TGTaskLoopArgs[7] = IfVal;
- TGTaskLoopArgs[8] = LBLVal.getPointer(CGF);
- TGTaskLoopArgs[9] = UBLVal.getPointer(CGF);
- TGTaskLoopArgs[10] = CGF.EmitLoadOfScalar(StLVal, Loc);
- TGTaskLoopArgs[11] =
+ TGTaskLoopArgs[4] = IfVal;
+ TGTaskLoopArgs[5] = LBLVal.getPointer(CGF);
+ TGTaskLoopArgs[6] = UBLVal.getPointer(CGF);
+ TGTaskLoopArgs[7] = CGF.EmitLoadOfScalar(StLVal, Loc);
+ TGTaskLoopArgs[8] =
llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0);
- TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned(
+ TGTaskLoopArgs[9] = llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
: NoSchedule);
- TGTaskLoopArgs[13] =
+ TGTaskLoopArgs[10] =
Data.Schedule.getPointer()
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0);
- TGTaskLoopArgs[14] =
+ TGTaskLoopArgs[11] =
llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0);
- TGTaskLoopArgs[15] = TaskInitResult.TaskDupFn
+ TGTaskLoopArgs[12] = TaskInitResult.TaskDupFn
? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskInitResult.TaskDupFn, CGF.VoidPtrTy)
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ const auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+ llvm::Function *RelocFn =
+ emitTaskRelocationFunction(CGM, Loc, *CS, CGF.CapturedStmtInfo, Data);
+ TGTaskLoopArgs[13] = RelocFn
+ ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ RelocFn, CGM.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop),
TGTaskLoopArgs);
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index e32308df74cae..02e3e1f98e969 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -362,8 +362,8 @@ __OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, VoidPtrPtr, SizeTy,
__OMP_RTL(__kmpc_taskgraph_task, false, Int32, IdentPtr, Int32, VoidPtr, Int32,
SizeTy, SizeTy, Int32, VoidPtr, VoidPtr)
__OMP_RTL(__kmpc_taskgraph_taskloop, false, Int32, IdentPtr, Int32, VoidPtr,
- Int32, SizeTy, VoidPtr, SizeTy, Int32, Int64Ptr, Int64Ptr, Int64,
- Int32, Int32, Int64, Int32, VoidPtr)
+ Int32, Int32, Int64Ptr, Int64Ptr, Int64,
+ Int32, Int32, Int64, Int32, VoidPtr, VoidPtr)
__OMP_RTL(__kmpc_taskgraph_taskwait, false, Void, IdentPtr, Int32, Int32,
VoidPtr, Int32)
__OMP_RTL(__kmpc_taskgraph_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index befca12786e70..9ebb7e6f654bc 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4514,10 +4514,9 @@ KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(
kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_relocate_t reloc);
KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
- size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize,
- kmp_int32 modifier, void *task_dup);
+ kmp_int32 modifier, void *task_dup, kmp_task_relocate_t reloc);
KMP_EXPORT void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ndeps,
kmp_depend_info_t *dep_list,
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index d595c555a72c0..7b3f4b04fbd16 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -5074,7 +5074,8 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb,
void *task_dup
#if OMP_TASKGRAPH_EXPERIMENTAL
,
- kmp_taskgraph_record_t *taskgraph_rec = nullptr
+ kmp_taskgraph_record_t *taskgraph_rec = nullptr,
+ kmp_task_relocate_t relocate = nullptr
#endif
) {
KMP_COUNT_BLOCK(OMP_TASKLOOP);
@@ -5170,6 +5171,7 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb,
taskgroup->taskgraph.reduce_input = nullptr;
}
node->taskloop_task = true;
+ node->relocate = relocate;
next_taskdata->owning_taskgraph = taskgraph_rec;
// FIXME: These dependency fields might be back-filled by the as-yet
// unimplemented task_iteration subsidiary directive. We'll need a way
@@ -5462,7 +5464,8 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
int modifier, void *task_dup
#if OMP_TASKGRAPH_EXPERIMENTAL
,
- kmp_taskgraph_record_t *taskgraph_rec = nullptr
+ kmp_taskgraph_record_t *taskgraph_rec = nullptr,
+ kmp_task_relocate_t relocate = nullptr
#endif
) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
@@ -5596,7 +5599,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
#if OMPT_SUPPORT
OMPT_GET_RETURN_ADDRESS(0),
#endif
- task_dup, taskgraph_rec);
+ task_dup, taskgraph_rec, relocate);
// check if clause value next
// Also require GOMP_taskloop to reduce to linear
// (taskdata->td_flags.native)
@@ -6121,12 +6124,12 @@ void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task, kmp_int32 flags,
- size_t sizeof_kmp_task_t, void *shareds,
- size_t sizeof_shareds, kmp_int32 if_val,
- kmp_uint64 *lb, kmp_uint64 *ub,
- kmp_int64 st, kmp_int32 nogroup,
- kmp_int32 sched, kmp_uint64 grainsize,
- kmp_int32 modifier, void *task_dup) {
+ kmp_int32 if_val, kmp_uint64 *lb,
+ kmp_uint64 *ub, kmp_int64 st,
+ kmp_int32 nogroup, kmp_int32 sched,
+ kmp_uint64 grainsize, kmp_int32 modifier,
+ void *task_dup,
+ kmp_task_relocate_t relocate) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -6135,7 +6138,7 @@ kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid,
kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
if (status == KMP_TDG_RECORDING)
__kmp_taskloop(loc_ref, gtid, new_task, if_val, lb, ub, st, nogroup,
- sched, grainsize, modifier, task_dup, rec);
+ sched, grainsize, modifier, task_dup, rec, relocate);
else if (status == KMP_TDG_READY) {
#ifdef DEBUG_TASKGRAPH
fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in "
diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
new file mode 100644
index 0000000000000..a6b473069d50c
--- /dev/null
+++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
@@ -0,0 +1,43 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+__attribute__((noinline)) static int run_taskgraph_mixed_capture(int seed) {
+ int x = seed;
+ int y = seed * 2;
+ int fp = 7;
+ int res = 0;
+
+#pragma omp taskgraph graph_id(612)
+ {
+#pragma omp taskloop replayable num_tasks(8) shared(x, y) firstprivate(fp) reduction(+ : res)
+ for (int i = 0; i < 16; ++i) {
+ res += x + y + fp + i;
+ }
+ }
+
+ return res;
+}
+
+int main() {
+ const int first = run_taskgraph_mixed_capture(1);
+ const int second = run_taskgraph_mixed_capture(100);
+
+ if (first != 280 || second != 5032) {
+ std::fprintf(stderr,
+ "FAIL lexical mixed capture taskloop replay first=%d second=%d expected=280/5032\n",
+ first, second);
+ return 1;
+ }
+
+ std::fprintf(stderr,
+ "PASS lexical mixed capture taskloop replay first=%d second=%d\n",
+ first, second);
+ return 0;
+}
+
+// CHECK: PASS lexical mixed capture taskloop replay first=280 second=5032
diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
new file mode 100644
index 0000000000000..5adb743c5a007
--- /dev/null
+++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
@@ -0,0 +1,85 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+struct Tracker {
+ static int Ctors;
+ static int Dtors;
+
+ int Value;
+
+ explicit Tracker(int V) : Value(V) { ++Ctors; }
+ ~Tracker() { ++Dtors; }
+
+ void bump(int Delta) { Value += Delta; }
+};
+
+int Tracker::Ctors = 0;
+int Tracker::Dtors = 0;
+
+__attribute__((noinline)) static int expected_recursive(int depth, int seed,
+ int run_tag) {
+ int local = 16 * (seed + (depth + 1) * 5 + run_tag) + 120;
+ if (depth == 0)
+ return local;
+ return local + expected_recursive(depth - 1, seed + 9, run_tag);
+}
+
+__attribute__((noinline)) static int run_recursive_nontrivial(int depth, int seed,
+ int run_tag) {
+ Tracker Obj(seed);
+ int res = 0;
+
+ int gid = 620 + depth;
+#pragma omp taskgraph graph_id(gid)
+ {
+#pragma omp taskloop replayable num_tasks(8) shared(Obj, depth, run_tag) reduction(+ : res)
+ for (int i = 0; i < 16; ++i) {
+ res += Obj.Value + (depth + 1) * 5 + run_tag + i;
+ }
+ }
+
+ if (depth == 0)
+ return res;
+ return res + run_recursive_nontrivial(depth - 1, seed + 9, run_tag);
+}
+
+int main() {
+ const int depth = 3;
+ int total_actual = 0;
+ int total_expected = 0;
+
+ for (int run = 0; run < 3; ++run) {
+ const int seed = 100 * run + 1;
+ const int actual = run_recursive_nontrivial(depth, seed, run);
+ const int expected = expected_recursive(depth, seed, run);
+
+ if (actual != expected) {
+ std::fprintf(stderr,
+ "FAIL recursive nontrivial taskloop run=%d actual=%d expected=%d\n",
+ run, actual, expected);
+ return 1;
+ }
+
+ total_actual += actual;
+ total_expected += expected;
+ }
+
+ if (Tracker::Ctors != Tracker::Dtors || Tracker::Ctors < 12) {
+ std::fprintf(stderr,
+ "FAIL recursive nontrivial taskloop lifetime ctors=%d dtors=%d total=%d expected=%d\n",
+ Tracker::Ctors, Tracker::Dtors, total_actual, total_expected);
+ return 1;
+ }
+
+ std::fprintf(stderr,
+ "PASS recursive nontrivial taskloop total=%d expected=%d ctors=%d dtors=%d\n",
+ total_actual, total_expected, Tracker::Ctors, Tracker::Dtors);
+ return 0;
+}
+
+// CHECK: PASS recursive nontrivial taskloop total=
diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
new file mode 100644
index 0000000000000..df846e80e0b9f
--- /dev/null
+++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
@@ -0,0 +1,57 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+struct Tracker {
+ static int Ctors;
+ static int Dtors;
+
+ int Value;
+
+ explicit Tracker(int V) : Value(V) { ++Ctors; }
+ ~Tracker() { ++Dtors; }
+
+ void bump(int Delta) { Value += Delta; }
+};
+
+int Tracker::Ctors = 0;
+int Tracker::Dtors = 0;
+
+__attribute__((noinline)) static int run_taskgraph_nontrivial(int seed) {
+ Tracker Obj(seed);
+ int res = 0;
+
+#pragma omp taskgraph graph_id(614)
+ {
+#pragma omp taskloop replayable num_tasks(8) shared(Obj) reduction(+ : res)
+ for (int i = 0; i < 16; ++i) {
+ res += Obj.Value + i;
+ }
+ }
+
+ return res;
+}
+
+int main() {
+ const int first = run_taskgraph_nontrivial(1);
+ const int second = run_taskgraph_nontrivial(100);
+
+ if (first != 136 || second != 1720 || Tracker::Ctors < 2 ||
+ Tracker::Dtors < 2 || Tracker::Ctors != Tracker::Dtors) {
+ std::fprintf(stderr,
+ "FAIL lexical nontrivial taskloop replay first=%d second=%d ctors=%d dtors=%d\n",
+ first, second, Tracker::Ctors, Tracker::Dtors);
+ return 1;
+ }
+
+ std::fprintf(stderr,
+ "PASS lexical nontrivial taskloop replay first=%d second=%d ctors=%d dtors=%d\n",
+ first, second, Tracker::Ctors, Tracker::Dtors);
+ return 0;
+}
+
+// CHECK: PASS lexical nontrivial taskloop replay first=136 second=1720
diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
new file mode 100644
index 0000000000000..5c5aada2222e8
--- /dev/null
+++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
@@ -0,0 +1,83 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// XFAIL: *
+// clang-format on
+
+#include <cstdio>
+#include <cstdint>
+
+__attribute__((noinline)) static int expected_recursive(int depth, int seed,
+ int run_tag) {
+ int value = 16 * (seed + (depth + 1) * 3 + run_tag) + 120;
+ if (depth == 0)
+ return value;
+ return value + expected_recursive(depth - 1, seed + 7, run_tag);
+}
+
+__attribute__((noinline)) static int run_recursive_frameid(int depth, int seed,
+ int run_tag) {
+ int value = seed;
+ int *ptr = &value;
+ int *&ptr_ref = ptr;
+ int sum_delta = 0;
+ uintptr_t frame_gid = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
+
+ // Typically, if captured pointers refer to locations on the stack, that
+ // would not be safe for taskgraph record/replay because we in general we
+ // cannot rewrite such pointers to point to the current (live) stack frame.
+ //
+ // This is one possible way around that though: we keep a taskgraph record
+ // per stack-depth, each of which may refer to the local stack frame.
+ //
+ // I probably wouldn't recommend use of this technique in production code.
+#pragma omp taskgraph graph_id(frame_gid)
+ {
+#pragma omp taskloop replayable num_tasks(8) shared(ptr_ref, depth, run_tag) reduction(+ : sum_delta)
+ for (int i = 0; i < 16; ++i) {
+ int delta = (depth + 1) * 3 + run_tag + i;
+ __atomic_fetch_add(ptr_ref, delta, __ATOMIC_RELAXED);
+ sum_delta += delta;
+ }
+ }
+
+ int local = value * 17 + sum_delta;
+
+ if (depth == 0)
+ return local;
+ return local + run_recursive_frameid(depth - 1, seed + 7, run_tag);
+}
+
+int main() {
+ const int depth = 3;
+ int recorded_sum = 0;
+ int replayed_sum = 0;
+
+ for (int run = 0; run < 3; ++run) {
+ int seed = 100 * run + 1;
+ int val = run_recursive_frameid(depth, seed, run);
+ if (run == 0)
+ recorded_sum = val;
+ else
+ replayed_sum += val;
+ }
+
+ // With missing relocation for taskloop replay, recursive invocations that
+ // mutate through shared-block pointers are expected to diverge from the
+ // expected replay behavior.
+ const int expected_replayed = 2 * recorded_sum;
+ if (replayed_sum == expected_replayed) {
+ std::fprintf(stderr,
+ "UNEXPECTED SUCCESS recursive pointer taskloop replay recorded=%d replayed_total=%d expected_total=%d\n",
+ recorded_sum, replayed_sum, expected_replayed);
+ return 0;
+ }
+
+ std::fprintf(stderr,
+ "EXPECTED FAILURE recursive pointer taskloop replay recorded=%d replayed_total=%d expected_total=%d\n",
+ recorded_sum, replayed_sum, expected_replayed);
+ return 1;
+}
+
+// CHECK: EXPECTED FAILURE recursive pointer taskloop replay
diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
new file mode 100644
index 0000000000000..8e8f98f4d86b6
--- /dev/null
+++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
@@ -0,0 +1,42 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+__attribute__((noinline)) static int run_taskgraph_pointer_shared(int seed) {
+ int value = seed;
+ int *ptr = &value;
+ int res = 0;
+
+#pragma omp taskgraph graph_id(613)
+ {
+#pragma omp taskloop replayable num_tasks(8) shared(ptr) reduction(+ : res)
+ for (int i = 0; i < 16; ++i) {
+ res += *ptr + i;
+ }
+ }
+
+ return res;
+}
+
+int main() {
+ const int first = run_taskgraph_pointer_shared(1);
+ const int second = run_taskgraph_pointer_shared(100);
+
+ if (first != 136 || second != 1720) {
+ std::fprintf(stderr,
+ "FAIL lexical pointer shared taskloop replay first=%d second=%d expected=136/1720\n",
+ first, second);
+ return 1;
+ }
+
+ std::fprintf(stderr,
+ "PASS lexical pointer shared taskloop replay first=%d second=%d\n",
+ first, second);
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/200405
More information about the llvm-branch-commits
mailing list