[clang] [llvm] [openmp] [OpenMP][offload] Fix dynamic schedule tracking (PR #97065)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 07:58:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-openmp
@llvm/pr-subscribers-clang
Author: Gheorghe-Teodor Bercea (doru1004)
<details>
<summary>Changes</summary>
This patch fixes the dynamic schedule tracking.
---
Patch is 788.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97065.diff
24 Files Affected:
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+14)
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+16)
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+5)
- (modified) clang/test/OpenMP/distribute_parallel_for_codegen.cpp (+36)
- (modified) clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp (+96-60)
- (modified) clang/test/OpenMP/nvptx_SPMD_codegen.cpp (+2034-1923)
- (modified) clang/test/OpenMP/ordered_codegen.cpp (+64-34)
- (modified) clang/test/OpenMP/parallel_for_codegen.cpp (+30-6)
- (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+4)
- (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+4)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp (+40)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp (+80-40)
- (modified) clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp (+40)
- (modified) clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp (+80-40)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+3)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+3)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+4)
- (modified) offload/DeviceRTL/include/Interface.h (+4)
- (modified) offload/DeviceRTL/src/Kernel.cpp (+1)
- (modified) offload/DeviceRTL/src/Workshare.cpp (+62-14)
- (added) offload/test/offloading/dynamic-schedule.cpp (+55)
- (added) offload/test/offloading/schedule.c (+84)
- (modified) openmp/runtime/src/kmp.h (+2)
- (modified) openmp/runtime/src/kmp_dispatch.cpp (+5)
``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..b47b521edd32c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2553,6 +2553,15 @@ void CGOpenMPRuntime::emitForDispatchInit(
Args);
}
+void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+ CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
+}
+
static void emitForStaticInitCall(
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
@@ -11996,6 +12005,11 @@ void CGOpenMPSIMDRuntime::emitForDispatchInit(
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitForStaticInit(
CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d..f65314d014c08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -946,6 +946,14 @@ class CGOpenMPRuntime {
unsigned IVSize, bool IVSigned, bool Ordered,
const DispatchRTInput &DispatchValues);
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ ///
+ virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc);
+
/// Struct with the values to be passed to the static runtime function
struct StaticRTInput {
/// Size of the iteration variable in bits.
@@ -1829,6 +1837,14 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
unsigned IVSize, bool IVSigned, bool Ordered,
const DispatchRTInput &DispatchValues) override;
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ ///
+ void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override;
+
/// Call the appropriate runtime routine to initialize it before start
/// of loop.
///
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..3c88f4eb71572 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2985,12 +2985,14 @@ void CodeGenFunction::EmitOMPForOuterLoop(
// run-sched-var ICV. If the ICV is set to auto, the schedule is
// implementation defined
//
+ // __kmpc_dispatch_init();
// while(__kmpc_dispatch_next(&LB, &UB)) {
// idx = LB;
// while (idx <= UB) { BODY; ++idx;
// __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
// } // inner loop
// }
+ // __kmpc_dispatch_deinit();
//
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When schedule(static, chunk_size) is specified, iterations are divided into
@@ -3044,6 +3046,9 @@ void CodeGenFunction::EmitOMPForOuterLoop(
OuterLoopArgs.DKind = LoopArgs.DKind;
EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
+ if (DynamicOrOrdered) {
+ RT.emitForDispatchDeinit(*this, S.getBeginLoc());
+ }
}
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index a8892a06d4b30..109f1690e448b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -2283,6 +2283,9 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -2533,6 +2536,9 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: ret void
@@ -4010,6 +4016,9 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
+// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -4253,6 +4262,9 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
+// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: ret void
@@ -6314,6 +6326,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -6554,6 +6569,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -8627,6 +8645,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -8867,6 +8888,9 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
// CHECK9: omp.precond.end:
// CHECK9-NEXT: ret void
@@ -10884,6 +10908,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -11117,6 +11144,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -13146,6 +13176,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
@@ -13379,6 +13412,9 @@ int main() {
// CHECK11: omp.dispatch.inc:
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK11: omp.dispatch.end:
+// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
// CHECK11: omp.precond.end:
// CHECK11-NEXT: ret void
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index a23d538910edd..14dcb17358061 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -2415,12 +2415,15 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP38]], 0
// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
@@ -2689,12 +2692,15 @@ int main() {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
-// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP39]], 0
// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
@@ -4310,12 +4316,15 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
-// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0
// CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
@@ -4577,12 +4586,15 @@ int main() {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
-// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
+// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK3: .omp.final.then:
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP39]], 0
// CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
@@ -6836,12 +6848,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0
// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
@@ -7100,12 +7115,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
+// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
+// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0
+// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP35]], 0
// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
@@ -9302,12 +9320,15 @@ int main() {
// CHECK9: omp.dispatch.inc:
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK9: omp.dispatch.end:
-// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
+// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
+// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
+// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK9: .omp.final.then:
-// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0
+// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP34]], 0
// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]],...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/97065
More information about the llvm-commits
mailing list