[clang] [flang] [llvm] [mlir] [OpenMP] Only generate call to __kmpc_global_thread_num when needed (PR #182669)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Feb 21 05:11:00 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Jan Leyonberg (jsjodin)
<details>
<summary>Changes</summary>
This patch is a small optimization to only generate a call to __kmpc_global_thread_num if the result is actually used.
---
Patch is 608.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182669.diff
18 Files Affected:
- (modified) clang/test/CIR/CodeGenOpenMP/omp-llvmir.c (-1)
- (modified) clang/test/OpenMP/cancel_codegen.cpp (+185-182)
- (modified) clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c (+4-9)
- (modified) clang/test/OpenMP/irbuilder_nested_parallel_for.c (+1595-1595)
- (modified) clang/test/OpenMP/nested_loop_codegen.cpp (+402-402)
- (modified) clang/test/OpenMP/parallel_codegen.cpp (+249-249)
- (modified) clang/test/OpenMP/taskgroup_codegen.cpp (+17-16)
- (modified) flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 (+9-9)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+3-1)
- (modified) llvm/test/Transforms/OpenMP/parallel_region_merging.ll (+84-79)
- (modified) mlir/test/Target/LLVMIR/openmp-dist_schedule_with_wsloop.mlir (+16-16)
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+1-2)
- (modified) mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir (-1)
- (modified) mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir (-1)
- (modified) mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir (-1)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir (+7-7)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-byref.mlir (-1)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir (-1)
``````````diff
diff --git a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
index d32753ae4475b..518152a4db01b 100644
--- a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
+++ b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
@@ -46,7 +46,6 @@
// LLVM: br label %[[ENTRY:.*]]
// LLVM: [[ENTRY]]:
-// LLVM: %[[THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
// LLVM: br label %[[OMP_PARALLEL:.*]]
// LLVM: [[OMP_PARALLEL]]:
diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 600aae211087a..acd2b9ce34148 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -310,8 +310,8 @@ for (int i = 0; i < argc; ++i) {
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META3]], !align [[META5:![0-9]+]]
// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @flag, align 4
// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
@@ -381,29 +381,29 @@ for (int i = 0; i < argc; ++i) {
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META17:![0-9]+]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META17]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META17]]
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP9]], i32 4)
// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
// CHECK1: .cancel.exit.i:
-// CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]]
// CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]]
// CHECK1: .cancel.continue.i:
-// CHECK1-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT: store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]]
// CHECK1-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK1: .omp_outlined..exit:
-// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias [[META17]]
// CHECK1-NEXT: ret i32 0
//
//
@@ -561,8 +561,8 @@ for (int i = 0; i < argc; ++i) {
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !nonnull [[META3]], !align [[META5]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !nonnull [[META3]], !align [[META5]]
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -700,42 +700,41 @@ for (int i = 0; i < argc; ++i) {
// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LASTITER32:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LOWERBOUND33:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_UPPERBOUND34:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_STRIDE35:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[I40:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
// CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4
// CHECK3-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]]
// CHECK3: omp_parallel:
// CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8
// CHECK3-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[ARGV_ADDR]], ptr [[GEP_ARGV_ADDR]], align 8
-// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]])
-// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]])
+// CHECK3-NEXT: br label [[OMP_PAR_EXIT:%.*]]
// CHECK3: omp.par.exit:
// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
// CHECK3: omp_section_loop.preheader:
// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4
// CHECK3-NEXT: store i32 0, ptr [[P_UPPERBOUND]], align 4
// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0)
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0)
// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
@@ -755,8 +754,8 @@ for (int i = 0; i < argc; ++i) {
// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
// CHECK3-NEXT: ]
// CHECK3: omp_section_loop.body.case:
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 3)
// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
// CHECK3: omp_section_loop.body.case.split:
@@ -769,93 +768,95 @@ for (int i = 0; i < argc; ++i) {
// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]]
// CHECK3: omp_section_loop.exit:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]])
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]])
// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]]
// CHECK3: omp_section_loop.after:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]]
-// CHECK3: omp_section_loop.preheader16:
-// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND29]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[P_UPPERBOUND30]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE31]], align 4
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, ptr [[P_LASTITER28]], ptr [[P_LOWERBOUND29]], ptr [[P_UPPERBOUND30]], ptr [[P_STRIDE31]], i32 1, i32 0)
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND29]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND30]], align 4
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER15:%.*]]
+// CHECK3: omp_section_loop.preheader15:
+// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND33]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[P_UPPERBOUND34]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE35]], align 4
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, ptr [[P_LASTITER32]], ptr [[P_LOWERBOUND33]], ptr [[P_UPPERBOUND34]], ptr [[P_STRIDE35]], i32 1, i32 0)
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND33]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND34]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]]
// CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]]
-// CHECK3: omp_section_loop.header17:
-// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]]
-// CHECK3: omp_section_loop.cond18:
-// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]]
-// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]]
-// CHECK3: omp_section_loop.body19:
-// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]]
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER16:%.*]]
+// CHECK3: omp_section_loop.header16:
+// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV22:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER15]] ], [ [[OMP_SECTION_LOOP_NEXT24:%.*]], [[OMP_SECTION_LOOP_INC19:%.*]] ]
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND17:%.*]]
+// CHECK3: omp_section_loop.cond17:
+// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP23:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV22]], [[TMP12]]
+// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP23]], label [[OMP_SECTION_LOOP_BODY18:%.*]], label [[OMP_SECTION_LOOP_EXIT20:%.*]]
+// CHECK3: omp_section_loop.body18:
+// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV22]], [[TMP9]]
// CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1
// CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0
-// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [
-// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]]
-// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE29:%.*]]
+// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER:%.*]] [
+// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
+// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]]
// CHECK3-NEXT: ]
-// CHECK3: omp_section_loop.body.case26:
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
+// CHECK3: omp_section_loop.body.case25:
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0
-// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]]
-// CHECK3: omp_section_loop.body.case26.split:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]]
-// CHECK3: omp_section_loop.body.case26.section.after:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]]
-// CHECK3: omp_section_loop.body.case29:
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3)
+// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
+// CHECK3: omp_section_loop.body.case25.split:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]]
+// CHECK3: omp_section_loop.body.case25.section.after:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER]]
+// CHECK3: omp_section_loop.body.case28:
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 3)
// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
-// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE29_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE29_CNCL:%.*]]
-// CHECK3: omp_section_loop.body.case29.split:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER29:%.*]]
-// CHECK3: omp_section_loop.body.case29.section.after30:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE29_SECTION_AFTER:%.*]]
-// CHECK3: omp_section_loop.body.case29.section.after:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:.*]]
-// CHECK3: omp_section_loop.body19.sections.after:
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC20:.*]]
-// CHECK3: omp_section_loop.inc20:
-// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]]
-// CHECK3: omp_section_loop.exit21:
-// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
-// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]])
-// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]]
-// CHECK3: omp_section_loop.after22:
+// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]]
+// CHECK3: omp_section_loop.body.case28.split:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER29:%.*]]
+// CHECK3: omp_section_loop.body.case28.section.after29:
+// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:%.*]]
+// CHECK3: omp_region.finalize:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]]
+// CHECK3: omp_section_loop.body.case28.section.after:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY18_SECTIONS_AFTER]]
+// CHECK3: omp_section_loop.body18.sections.after:
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC19]]
+// CHECK3: omp_section_loop.inc19:
+// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT24]] = add nuw i32 [[OMP_SECTION_LOOP_IV22]], 1
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER16]]
+// CHECK3: omp_section_loop.exit20:
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM36]])
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM37]])
+// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER21:%.*]]
+// CHECK3: omp_section_loop.after21:
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DO...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/182669
More information about the cfe-commits
mailing list