[llvm] c84a544 - [OPENMP]Fix PR50129: omp cancel parallel not working as expected.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 4 08:28:38 PDT 2021


Author: Alexey Bataev
Date: 2021-06-04T08:24:55-07:00
New Revision: c84a5448b5ac3212303c553866e8d42a5a75ce30

URL: https://github.com/llvm/llvm-project/commit/c84a5448b5ac3212303c553866e8d42a5a75ce30
DIFF: https://github.com/llvm/llvm-project/commit/c84a5448b5ac3212303c553866e8d42a5a75ce30.diff

LOG: [OPENMP]Fix PR50129: omp cancel parallel not working as expected.

Need to emit a call for __kmpc_cancel_barrier in the exit block for
__kmpc_cancel function call if cancellation of the parallel block is
requested.

Differential Revision: https://reviews.llvm.org/D103646

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/cancel_codegen.cpp
    clang/test/OpenMP/cancellation_point_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
    clang/test/OpenMP/target_parallel_codegen.cpp
    clang/test/OpenMP/target_parallel_for_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index fc315b24687c3..068fd99b938f7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6307,6 +6307,7 @@ void CGOpenMPRuntime::emitCancellationPointCall(
               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
           Args);
       // if (__kmpc_cancellationpoint()) {
+      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
       //   exit from construct;
       // }
       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
@@ -6314,6 +6315,8 @@ void CGOpenMPRuntime::emitCancellationPointCall(
       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
+      if (CancelRegion == OMPD_parallel)
+        emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
       CodeGenFunction::JumpDest CancelDest =
           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
@@ -6343,6 +6346,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       llvm::Value *Result = CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
       // if (__kmpc_cancel()) {
+      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
       //   exit from construct;
       // }
       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
@@ -6350,6 +6354,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
+      if (CancelRegion == OMPD_parallel)
+        RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
       CodeGenFunction::JumpDest CancelDest =
           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());

diff  --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 358c121fb38ef..eae4bdbaf7a7f 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -115,7 +115,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -149,15 +149,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
@@ -183,7 +183,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       .cancel.exit10:
 // CHECK1-NEXT:    br label [[CANCEL_EXIT19:%.*]]
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 // CHECK1:       .cancel.continue11:
 // CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
@@ -203,10 +203,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND6]]
 // CHECK1:       omp.inner.for.end18:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT20:%.*]]
 // CHECK1:       cancel.cont20:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -224,7 +224,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
 // CHECK1-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
@@ -261,7 +261,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       .cancel.exit29:
 // CHECK1-NEXT:    br label [[CANCEL_EXIT34:%.*]]
 // CHECK1:       cancel.exit19:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT20]]
 // CHECK1:       .cancel.continue30:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -279,15 +279,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       omp.inner.for.end33:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       cancel.exit34:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT35:%.*]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    br label [[CANCEL_CONT35]]
 // CHECK1:       cancel.cont35:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK1-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
@@ -323,35 +323,38 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
 // CHECK1-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK1:       .cancel.exit:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
 // CHECK1-NEXT:    br label [[RETURN:%.*]]
 // CHECK1:       .cancel.continue:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK1-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
-// CHECK1-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK1-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
+// CHECK1-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+// CHECK1-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
 // CHECK1:       .cancel.exit2:
 // CHECK1-NEXT:    br label [[RETURN]]
 // CHECK1:       .cancel.continue3:
-// CHECK1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
-// CHECK1-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
-// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// CHECK1-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
 // CHECK1-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK1-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
 // CHECK1-NEXT:    br label [[RETURN]]
@@ -425,7 +428,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -459,12 +462,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
 // CHECK1-NEXT:    ret void
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -486,7 +489,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -529,12 +532,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
 // CHECK1-NEXT:    ret void
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -583,7 +586,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[R3]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
@@ -635,7 +638,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
 // CHECK1-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
@@ -657,7 +660,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       cancel.exit:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       .omp.reduction.case2:
 // CHECK1-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
@@ -732,7 +735,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK2-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -766,15 +769,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK2-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
 // CHECK2-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
@@ -800,7 +803,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       .cancel.exit10:
 // CHECK2-NEXT:    br label [[CANCEL_EXIT19:%.*]]
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 // CHECK2:       .cancel.continue11:
 // CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
@@ -820,10 +823,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND6]]
 // CHECK2:       omp.inner.for.end18:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT20:%.*]]
 // CHECK2:       cancel.cont20:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -841,7 +844,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
 // CHECK2-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
@@ -878,7 +881,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       .cancel.exit29:
 // CHECK2-NEXT:    br label [[CANCEL_EXIT34:%.*]]
 // CHECK2:       cancel.exit19:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT20]]
 // CHECK2:       .cancel.continue30:
 // CHECK2-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -896,15 +899,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       omp.inner.for.end33:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       cancel.exit34:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT35:%.*]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    br label [[CANCEL_CONT35]]
 // CHECK2:       cancel.cont35:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK2-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
 // CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
@@ -940,35 +943,38 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
 // CHECK2-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK2:       .cancel.exit:
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
 // CHECK2-NEXT:    br label [[RETURN:%.*]]
 // CHECK2:       .cancel.continue:
 // CHECK2-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK2:       omp_if.else:
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK2-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
-// CHECK2-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
-// CHECK2-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK2-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
+// CHECK2-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+// CHECK2-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
 // CHECK2:       .cancel.exit2:
 // CHECK2-NEXT:    br label [[RETURN]]
 // CHECK2:       .cancel.continue3:
-// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK2-NEXT:    [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
-// CHECK2-NEXT:    [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
-// CHECK2-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
-// CHECK2-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
-// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// CHECK2-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
 // CHECK2-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK2-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
 // CHECK2-NEXT:    br label [[RETURN]]
@@ -1042,7 +1048,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -1076,12 +1082,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
 // CHECK2-NEXT:    ret void
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -1103,7 +1109,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -1146,12 +1152,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
 // CHECK2-NEXT:    ret void
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -1200,7 +1206,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[R3]], align 4
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK2-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
@@ -1252,7 +1258,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK2-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
 // CHECK2-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
@@ -1274,7 +1280,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       cancel.exit:
 // CHECK2-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       .omp.reduction.case2:
 // CHECK2-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
@@ -1322,20 +1328,20 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[P_LASTITER26:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[P_STRIDE29:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[I35:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[I36:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
 // CHECK3-NEXT:    [[R:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    store i32 0, i32* [[RETVAL]], align 4
@@ -1354,8 +1360,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
 // CHECK3-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
@@ -1378,75 +1384,75 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
 // CHECK3:       omp_section_loop.exit:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
 // CHECK3:       omp_section_loop.after:
 // CHECK3-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
 // CHECK3:       omp_sections.end:
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
-// CHECK3:       omp_section_loop.preheader12:
-// CHECK3-NEXT:    store i32 0, i32* [[P_LOWERBOUND27]], align 4
-// CHECK3-NEXT:    store i32 1, i32* [[P_UPPERBOUND28]], align 4
-// CHECK3-NEXT:    store i32 1, i32* [[P_STRIDE29]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
-// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
-// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
+// CHECK3:       omp_section_loop.preheader13:
+// CHECK3-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
+// CHECK3-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
+// CHECK3-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
+// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
+// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
 // CHECK3-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
-// CHECK3:       omp_section_loop.header13:
-// CHECK3-NEXT:    [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_COND14:%.*]]
-// CHECK3:       omp_section_loop.cond14:
-// CHECK3-NEXT:    [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
-// CHECK3-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
-// CHECK3:       omp_section_loop.body15:
-// CHECK3-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
+// CHECK3:       omp_section_loop.header14:
+// CHECK3-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
+// CHECK3:       omp_section_loop.cond15:
+// CHECK3-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
+// CHECK3-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
+// CHECK3:       omp_section_loop.body16:
+// CHECK3-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
 // CHECK3-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
 // CHECK3-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
-// CHECK3-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
-// CHECK3-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
-// CHECK3-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// CHECK3-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
+// CHECK3-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
+// CHECK3-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
 // CHECK3-NEXT:    ]
-// CHECK3:       omp_section_loop.inc16:
-// CHECK3-NEXT:    [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13]]
-// CHECK3:       omp_section_loop.exit17:
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
-// CHECK3:       omp_section_loop.after18:
-// CHECK3-NEXT:    br label [[OMP_SECTIONS_END32:%.*]]
-// CHECK3:       omp_sections.end32:
+// CHECK3:       omp_section_loop.inc17:
+// CHECK3-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
+// CHECK3:       omp_section_loop.exit18:
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
+// CHECK3:       omp_section_loop.after19:
+// CHECK3-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
+// CHECK3:       omp_sections.end33:
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK3-NEXT:    [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT:    store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK3-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK3-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[I]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
 // CHECK3-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
 // CHECK3:       omp.precond.then:
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-// CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK3-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
-// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
-// CHECK3-NEXT:    [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT:    br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
+// CHECK3-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// CHECK3-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK3:       cond.true:
-// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK3-NEXT:    br label [[COND_END:%.*]]
 // CHECK3:       cond.false:
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
@@ -1460,50 +1466,50 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       omp.inner.for.cond:
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK3-NEXT:    [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
-// CHECK3-NEXT:    br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK3-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// CHECK3-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK3:       omp.inner.for.body:
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
-// CHECK3-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT:    store i32 [[ADD39]], i32* [[I35]], align 4
+// CHECK3-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK3-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
-// CHECK3-NEXT:    [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
-// CHECK3-NEXT:    br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// CHECK3-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
-// CHECK3-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// CHECK3-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
 // CHECK3-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK3-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK3:       .cancel.exit:
 // CHECK3-NEXT:    br label [[CANCEL_EXIT:%.*]]
 // CHECK3:       omp_section_loop.body.case:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
 // CHECK3-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
 // CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
 // CHECK3:       omp_section_loop.body.case.split:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
 // CHECK3:       omp_section_loop.body.case.cncl:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
-// CHECK3:       omp_section_loop.body.case22:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// CHECK3:       omp_section_loop.body.case23:
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
 // CHECK3-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
-// CHECK3-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
-// CHECK3:       omp_section_loop.body.case22.split:
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK3:       omp_section_loop.body.case22.cncl:
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK3:       omp_section_loop.body.case24:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// CHECK3-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
+// CHECK3:       omp_section_loop.body.case23.split:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK3:       omp_section_loop.body.case23.cncl:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK3:       omp_section_loop.body.case25:
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
 // CHECK3-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
-// CHECK3-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
-// CHECK3:       omp_section_loop.body.case24.split:
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK3:       omp_section_loop.body.case24.cncl:
-// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// CHECK3-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
+// CHECK3:       omp_section_loop.body.case25.split:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK3:       omp_section_loop.body.case25.cncl:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
 // CHECK3:       .cancel.continue:
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK3:       omp_if.else:
@@ -1514,30 +1520,30 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK3:       omp.inner.for.inc:
 // CHECK3-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
-// CHECK3-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
-// CHECK3-NEXT:    store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK3-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       cancel.exit:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
-// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
 // CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK3:       omp.precond.end:
 // CHECK3-NEXT:    br label [[CANCEL_CONT]]
 // CHECK3:       cancel.cont:
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
-// CHECK3-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// CHECK3-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
-// CHECK3-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// CHECK3-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
 // CHECK3-NEXT:    store i32 0, i32* [[R]], align 4
@@ -1568,25 +1574,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
-// CHECK3-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
-// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK3-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 // CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
-// CHECK3:       .cncl4:
+// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK3:       .cncl5:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK3:       .cont:
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK3-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
-// CHECK3-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
-// CHECK3-NEXT:    [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
-// CHECK3-NEXT:    store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// CHECK3-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK3-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK3-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
+// CHECK3-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
@@ -1596,6 +1602,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
 // CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK3:       .cncl:
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK3:       .split:
 // CHECK3-NEXT:    br label [[TMP3]]
@@ -1947,20 +1955,20 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[P_LASTITER26:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[P_STRIDE29:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[I35:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[I36:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
 // CHECK4-NEXT:    [[R:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
@@ -1979,8 +1987,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
 // CHECK4-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
 // CHECK4-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
@@ -2003,75 +2011,75 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
 // CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
 // CHECK4:       omp_section_loop.exit:
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
 // CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
 // CHECK4:       omp_section_loop.after:
 // CHECK4-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
 // CHECK4:       omp_sections.end:
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
-// CHECK4:       omp_section_loop.preheader12:
-// CHECK4-NEXT:    store i32 0, i32* [[P_LOWERBOUND27]], align 4
-// CHECK4-NEXT:    store i32 1, i32* [[P_UPPERBOUND28]], align 4
-// CHECK4-NEXT:    store i32 1, i32* [[P_STRIDE29]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
-// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
-// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
+// CHECK4:       omp_section_loop.preheader13:
+// CHECK4-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
+// CHECK4-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
+// CHECK4-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
+// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
+// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
 // CHECK4-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
 // CHECK4-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
-// CHECK4:       omp_section_loop.header13:
-// CHECK4-NEXT:    [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_COND14:%.*]]
-// CHECK4:       omp_section_loop.cond14:
-// CHECK4-NEXT:    [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
-// CHECK4-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
-// CHECK4:       omp_section_loop.body15:
-// CHECK4-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
+// CHECK4:       omp_section_loop.header14:
+// CHECK4-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
+// CHECK4:       omp_section_loop.cond15:
+// CHECK4-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
+// CHECK4-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
+// CHECK4:       omp_section_loop.body16:
+// CHECK4-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
 // CHECK4-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
 // CHECK4-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
-// CHECK4-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
-// CHECK4-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
-// CHECK4-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// CHECK4-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
+// CHECK4-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
+// CHECK4-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
 // CHECK4-NEXT:    ]
-// CHECK4:       omp_section_loop.inc16:
-// CHECK4-NEXT:    [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13]]
-// CHECK4:       omp_section_loop.exit17:
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
-// CHECK4:       omp_section_loop.after18:
-// CHECK4-NEXT:    br label [[OMP_SECTIONS_END32:%.*]]
-// CHECK4:       omp_sections.end32:
+// CHECK4:       omp_section_loop.inc17:
+// CHECK4-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
+// CHECK4:       omp_section_loop.exit18:
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
+// CHECK4:       omp_section_loop.after19:
+// CHECK4-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
+// CHECK4:       omp_sections.end33:
 // CHECK4-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
 // CHECK4-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK4-NEXT:    [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK4-NEXT:    store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK4-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK4-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[I]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
 // CHECK4-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
 // CHECK4:       omp.precond.then:
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-// CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK4-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
-// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
-// CHECK4-NEXT:    [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
-// CHECK4-NEXT:    br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
+// CHECK4-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// CHECK4-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK4:       cond.true:
-// CHECK4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK4-NEXT:    br label [[COND_END:%.*]]
 // CHECK4:       cond.false:
 // CHECK4-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
@@ -2085,50 +2093,50 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4:       omp.inner.for.cond:
 // CHECK4-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK4-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK4-NEXT:    [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
-// CHECK4-NEXT:    br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK4-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// CHECK4-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK4:       omp.inner.for.body:
 // CHECK4-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
-// CHECK4-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT:    store i32 [[ADD39]], i32* [[I35]], align 4
+// CHECK4-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK4-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
 // CHECK4-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
-// CHECK4-NEXT:    [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
-// CHECK4-NEXT:    br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK4-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// CHECK4-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK4:       omp_if.then:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
-// CHECK4-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// CHECK4-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
 // CHECK4-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK4-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK4:       .cancel.exit:
 // CHECK4-NEXT:    br label [[CANCEL_EXIT:%.*]]
 // CHECK4:       omp_section_loop.body.case:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
 // CHECK4-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
 // CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
 // CHECK4:       omp_section_loop.body.case.split:
 // CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
 // CHECK4:       omp_section_loop.body.case.cncl:
 // CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
-// CHECK4:       omp_section_loop.body.case22:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// CHECK4:       omp_section_loop.body.case23:
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
 // CHECK4-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
-// CHECK4-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
-// CHECK4:       omp_section_loop.body.case22.split:
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK4:       omp_section_loop.body.case22.cncl:
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK4:       omp_section_loop.body.case24:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// CHECK4-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
+// CHECK4:       omp_section_loop.body.case23.split:
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK4:       omp_section_loop.body.case23.cncl:
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK4:       omp_section_loop.body.case25:
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
 // CHECK4-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
-// CHECK4-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
-// CHECK4:       omp_section_loop.body.case24.split:
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK4:       omp_section_loop.body.case24.cncl:
-// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// CHECK4-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
+// CHECK4:       omp_section_loop.body.case25.split:
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK4:       omp_section_loop.body.case25.cncl:
+// CHECK4-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
 // CHECK4:       .cancel.continue:
 // CHECK4-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK4:       omp_if.else:
@@ -2139,30 +2147,30 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK4:       omp.inner.for.inc:
 // CHECK4-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
-// CHECK4-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
-// CHECK4-NEXT:    store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// CHECK4-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK4-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK4:       omp.inner.for.end:
 // CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK4:       omp.loop.exit:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       cancel.exit:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
-// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
 // CHECK4-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK4:       omp.precond.end:
 // CHECK4-NEXT:    br label [[CANCEL_CONT]]
 // CHECK4:       cancel.cont:
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
-// CHECK4-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// CHECK4-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
 // CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
-// CHECK4-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// CHECK4-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
 // CHECK4-NEXT:    store i32 0, i32* [[R]], align 4
@@ -2193,25 +2201,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
 // CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
-// CHECK4-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK4-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 // CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
-// CHECK4:       .cncl4:
+// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK4:       .cncl5:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK4:       .cont:
 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK4-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK4-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
-// CHECK4-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
-// CHECK4-NEXT:    [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
-// CHECK4-NEXT:    store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// CHECK4-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// CHECK4-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK4-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK4-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK4-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
+// CHECK4-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
@@ -2221,6 +2229,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
 // CHECK4-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK4:       .cncl:
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK4:       .split:
 // CHECK4-NEXT:    br label [[TMP3]]
@@ -2599,7 +2609,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK7-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK7-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -2633,15 +2643,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK7:       omp.inner.for.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK7:       cancel.cont:
-// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
 // CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK7-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
 // CHECK7-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
@@ -2667,7 +2677,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7:       .cancel.exit10:
 // CHECK7-NEXT:    br label [[CANCEL_EXIT19:%.*]]
 // CHECK7:       cancel.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT]]
 // CHECK7:       .cancel.continue11:
 // CHECK7-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
@@ -2687,10 +2697,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
 // CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND6]]
 // CHECK7:       omp.inner.for.end18:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT20:%.*]]
 // CHECK7:       cancel.cont20:
-// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK7-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK7-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK7-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -2708,7 +2718,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
 // CHECK7-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
@@ -2745,7 +2755,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7:       .cancel.exit29:
 // CHECK7-NEXT:    br label [[CANCEL_EXIT34:%.*]]
 // CHECK7:       cancel.exit19:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT20]]
 // CHECK7:       .cancel.continue30:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -2763,15 +2773,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7:       omp.inner.for.end33:
 // CHECK7-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK7:       omp.loop.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK7:       cancel.exit34:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT35:%.*]]
 // CHECK7:       omp.precond.end:
 // CHECK7-NEXT:    br label [[CANCEL_CONT35]]
 // CHECK7:       cancel.cont35:
-// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// CHECK7-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK7-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK7-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
 // CHECK7-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
@@ -2807,35 +2817,38 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
 // CHECK7-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK7:       .cancel.exit:
+// CHECK7-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// CHECK7-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
 // CHECK7-NEXT:    br label [[RETURN:%.*]]
 // CHECK7:       .cancel.continue:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
-// CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK7-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
-// CHECK7-NEXT:    [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
-// CHECK7-NEXT:    [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK7-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// CHECK7-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK7-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK7-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK7-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK7-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK7-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
-// CHECK7-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK7-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
-// CHECK7-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
-// CHECK7-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK7-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// CHECK7-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK7-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK7-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
+// CHECK7-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+// CHECK7-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
 // CHECK7:       .cancel.exit2:
 // CHECK7-NEXT:    br label [[RETURN]]
 // CHECK7:       .cancel.continue3:
-// CHECK7-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK7-NEXT:    [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK7-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
-// CHECK7-NEXT:    [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
-// CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
-// CHECK7-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
-// CHECK7-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
-// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// CHECK7-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK7-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK7-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
+// CHECK7-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
+// CHECK7-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// CHECK7-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
+// CHECK7-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
 // CHECK7-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK7-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
 // CHECK7-NEXT:    br label [[RETURN]]
@@ -2909,7 +2922,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK7-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK7-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK7-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -2943,12 +2956,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK7:       omp.inner.for.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK7:       cancel.cont:
 // CHECK7-NEXT:    ret void
 // CHECK7:       cancel.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -2970,7 +2983,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK7-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK7-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK7-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK7-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -3013,12 +3026,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK7:       omp.inner.for.end:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK7:       cancel.cont:
 // CHECK7-NEXT:    ret void
 // CHECK7:       cancel.exit:
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -3067,7 +3080,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7-NEXT:    store i32 0, i32* [[R3]], align 4
 // CHECK7-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK7-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK7-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK7-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
@@ -3119,7 +3132,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7:       omp.loop.exit:
 // CHECK7-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK7-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
 // CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK7-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
 // CHECK7-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
@@ -3141,7 +3154,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK7:       cancel.exit:
 // CHECK7-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK7-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
-// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// CHECK7-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
 // CHECK7-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK7:       .omp.reduction.case2:
 // CHECK7-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
@@ -3216,7 +3229,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK8-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK8-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -3250,15 +3263,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK8:       omp.inner.for.end:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK8:       cancel.cont:
-// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
 // CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
 // CHECK8-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
 // CHECK8-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
@@ -3284,7 +3297,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8:       .cancel.exit10:
 // CHECK8-NEXT:    br label [[CANCEL_EXIT19:%.*]]
 // CHECK8:       cancel.exit:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT]]
 // CHECK8:       .cancel.continue11:
 // CHECK8-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
@@ -3304,10 +3317,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
 // CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND6]]
 // CHECK8:       omp.inner.for.end18:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT20:%.*]]
 // CHECK8:       cancel.cont20:
-// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK8-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK8-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK8-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -3325,7 +3338,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
 // CHECK8-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK8-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
 // CHECK8-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
@@ -3362,7 +3375,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8:       .cancel.exit29:
 // CHECK8-NEXT:    br label [[CANCEL_EXIT34:%.*]]
 // CHECK8:       cancel.exit19:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT20]]
 // CHECK8:       .cancel.continue30:
 // CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -3380,15 +3393,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8:       omp.inner.for.end33:
 // CHECK8-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK8:       omp.loop.exit:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK8:       cancel.exit34:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT35:%.*]]
 // CHECK8:       omp.precond.end:
 // CHECK8-NEXT:    br label [[CANCEL_CONT35]]
 // CHECK8:       cancel.cont35:
-// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// CHECK8-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK8-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK8-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
 // CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
@@ -3424,35 +3437,38 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
 // CHECK8-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK8:       .cancel.exit:
+// CHECK8-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// CHECK8-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
 // CHECK8-NEXT:    br label [[RETURN:%.*]]
 // CHECK8:       .cancel.continue:
 // CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK8:       omp_if.else:
 // CHECK8-NEXT:    br label [[OMP_IF_END]]
 // CHECK8:       omp_if.end:
-// CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK8-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
-// CHECK8-NEXT:    [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
-// CHECK8-NEXT:    [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK8-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// CHECK8-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK8-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK8-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK8-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK8-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK8-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
-// CHECK8-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK8-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
-// CHECK8-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
-// CHECK8-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK8-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// CHECK8-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK8-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK8-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]])
+// CHECK8-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+// CHECK8-NEXT:    br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
 // CHECK8:       .cancel.exit2:
 // CHECK8-NEXT:    br label [[RETURN]]
 // CHECK8:       .cancel.continue3:
-// CHECK8-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK8-NEXT:    [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK8-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
-// CHECK8-NEXT:    [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
-// CHECK8-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
-// CHECK8-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
-// CHECK8-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
-// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// CHECK8-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK8-NEXT:    [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK8-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0
+// CHECK8-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// CHECK8-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0
+// CHECK8-NEXT:    [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// CHECK8-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP20]] to i32
+// CHECK8-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]]
 // CHECK8-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK8-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
 // CHECK8-NEXT:    br label [[RETURN]]
@@ -3526,7 +3542,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK8-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK8-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK8-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -3560,12 +3576,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK8:       omp.inner.for.end:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK8:       cancel.cont:
 // CHECK8-NEXT:    ret void
 // CHECK8:       cancel.exit:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -3587,7 +3603,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK8-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK8-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK8-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -3630,12 +3646,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK8:       omp.inner.for.end:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK8:       cancel.cont:
 // CHECK8-NEXT:    ret void
 // CHECK8:       cancel.exit:
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -3684,7 +3700,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8-NEXT:    store i32 0, i32* [[R3]], align 4
 // CHECK8-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK8-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK8-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK8-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
@@ -3736,7 +3752,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8:       omp.loop.exit:
 // CHECK8-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]])
 // CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // CHECK8-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
 // CHECK8-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
@@ -3758,7 +3774,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK8:       cancel.exit:
 // CHECK8-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK8-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
-// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// CHECK8-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]])
 // CHECK8-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK8:       .omp.reduction.case2:
 // CHECK8-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
@@ -3806,20 +3822,20 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[P_LASTITER26:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[P_STRIDE29:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[I35:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[I36:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
 // CHECK9-NEXT:    [[R:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
@@ -3838,8 +3854,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
 // CHECK9-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
 // CHECK9-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
 // CHECK9-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
@@ -3862,75 +3878,75 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
 // CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
 // CHECK9:       omp_section_loop.exit:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
 // CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
 // CHECK9:       omp_section_loop.after:
 // CHECK9-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
 // CHECK9:       omp_sections.end:
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
-// CHECK9:       omp_section_loop.preheader12:
-// CHECK9-NEXT:    store i32 0, i32* [[P_LOWERBOUND27]], align 4
-// CHECK9-NEXT:    store i32 1, i32* [[P_UPPERBOUND28]], align 4
-// CHECK9-NEXT:    store i32 1, i32* [[P_STRIDE29]], align 4
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
-// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
-// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
+// CHECK9:       omp_section_loop.preheader13:
+// CHECK9-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
+// CHECK9-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
+// CHECK9-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
+// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
+// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
 // CHECK9-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
-// CHECK9:       omp_section_loop.header13:
-// CHECK9-NEXT:    [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_COND14:%.*]]
-// CHECK9:       omp_section_loop.cond14:
-// CHECK9-NEXT:    [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
-// CHECK9-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
-// CHECK9:       omp_section_loop.body15:
-// CHECK9-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
+// CHECK9:       omp_section_loop.header14:
+// CHECK9-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
+// CHECK9:       omp_section_loop.cond15:
+// CHECK9-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
+// CHECK9-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
+// CHECK9:       omp_section_loop.body16:
+// CHECK9-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
 // CHECK9-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
 // CHECK9-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
-// CHECK9-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
-// CHECK9-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
-// CHECK9-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// CHECK9-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
+// CHECK9-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
+// CHECK9-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
 // CHECK9-NEXT:    ]
-// CHECK9:       omp_section_loop.inc16:
-// CHECK9-NEXT:    [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13]]
-// CHECK9:       omp_section_loop.exit17:
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
-// CHECK9:       omp_section_loop.after18:
-// CHECK9-NEXT:    br label [[OMP_SECTIONS_END32:%.*]]
-// CHECK9:       omp_sections.end32:
+// CHECK9:       omp_section_loop.inc17:
+// CHECK9-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
+// CHECK9:       omp_section_loop.exit18:
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
+// CHECK9:       omp_section_loop.after19:
+// CHECK9-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
+// CHECK9:       omp_sections.end33:
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK9-NEXT:    [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK9-NEXT:    store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK9-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK9-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK9-NEXT:    store i32 0, i32* [[I]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK9-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
 // CHECK9-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
 // CHECK9:       omp.precond.then:
 // CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-// CHECK9-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK9-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK9-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
 // CHECK9-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK9-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
-// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK9-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK9-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
-// CHECK9-NEXT:    [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
-// CHECK9-NEXT:    br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK9-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
+// CHECK9-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// CHECK9-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK9:       cond.true:
-// CHECK9-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK9-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK9-NEXT:    br label [[COND_END:%.*]]
 // CHECK9:       cond.false:
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
@@ -3944,50 +3960,50 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9:       omp.inner.for.cond:
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK9-NEXT:    [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
-// CHECK9-NEXT:    br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK9-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// CHECK9-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK9:       omp.inner.for.body:
 // CHECK9-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
-// CHECK9-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK9-NEXT:    store i32 [[ADD39]], i32* [[I35]], align 4
+// CHECK9-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK9-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
-// CHECK9-NEXT:    [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
-// CHECK9-NEXT:    br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// CHECK9-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
-// CHECK9-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// CHECK9-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
 // CHECK9-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK9-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK9:       .cancel.exit:
 // CHECK9-NEXT:    br label [[CANCEL_EXIT:%.*]]
 // CHECK9:       omp_section_loop.body.case:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
 // CHECK9-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
 // CHECK9-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
 // CHECK9:       omp_section_loop.body.case.split:
 // CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
 // CHECK9:       omp_section_loop.body.case.cncl:
 // CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
-// CHECK9:       omp_section_loop.body.case22:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// CHECK9:       omp_section_loop.body.case23:
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
 // CHECK9-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
-// CHECK9:       omp_section_loop.body.case22.split:
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK9:       omp_section_loop.body.case22.cncl:
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK9:       omp_section_loop.body.case24:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
+// CHECK9:       omp_section_loop.body.case23.split:
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK9:       omp_section_loop.body.case23.cncl:
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK9:       omp_section_loop.body.case25:
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
 // CHECK9-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
-// CHECK9-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
-// CHECK9:       omp_section_loop.body.case24.split:
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK9:       omp_section_loop.body.case24.cncl:
-// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// CHECK9-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
+// CHECK9:       omp_section_loop.body.case25.split:
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK9:       omp_section_loop.body.case25.cncl:
+// CHECK9-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
 // CHECK9:       .cancel.continue:
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK9:       omp_if.else:
@@ -3998,30 +4014,30 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
-// CHECK9-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
-// CHECK9-NEXT:    store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK9-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK9:       omp.inner.for.end:
 // CHECK9-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK9:       omp.loop.exit:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
 // CHECK9-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK9:       cancel.exit:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
-// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// CHECK9-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
 // CHECK9-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK9:       omp.precond.end:
 // CHECK9-NEXT:    br label [[CANCEL_CONT]]
 // CHECK9:       cancel.cont:
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
-// CHECK9-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// CHECK9-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
-// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
 // CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
 // CHECK9-NEXT:    store i32 0, i32* [[R]], align 4
@@ -4052,25 +4068,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
 // CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
-// CHECK9-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
-// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// CHECK9-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK9-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
-// CHECK9:       .cncl4:
+// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK9:       .cncl5:
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK9:       .cont:
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK9-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK9-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
-// CHECK9-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
-// CHECK9-NEXT:    [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
-// CHECK9-NEXT:    store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// CHECK9-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// CHECK9-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK9-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// CHECK9-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK9-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK9-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
+// CHECK9-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK9-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK9:       omp.par.pre_finalize:
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
@@ -4080,6 +4096,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
 // CHECK9-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK9:       .cncl:
+// CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK9:       .split:
 // CHECK9-NEXT:    br label [[TMP3]]
@@ -4431,20 +4449,20 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[P_LASTITER26:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[P_STRIDE29:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[P_LASTITER27:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[P_LOWERBOUND28:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[P_UPPERBOUND29:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[I35:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[I36:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
 // CHECK10-NEXT:    [[R:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    store i32 0, i32* [[RETVAL]], align 4
@@ -4463,8 +4481,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
 // CHECK10-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
 // CHECK10-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
 // CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
 // CHECK10-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
 // CHECK10-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
@@ -4487,75 +4505,75 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
 // CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
 // CHECK10:       omp_section_loop.exit:
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]])
 // CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
 // CHECK10:       omp_section_loop.after:
 // CHECK10-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
 // CHECK10:       omp_sections.end:
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
-// CHECK10:       omp_section_loop.preheader12:
-// CHECK10-NEXT:    store i32 0, i32* [[P_LOWERBOUND27]], align 4
-// CHECK10-NEXT:    store i32 1, i32* [[P_UPPERBOUND28]], align 4
-// CHECK10-NEXT:    store i32 1, i32* [[P_STRIDE29]], align 4
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
-// CHECK10-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
-// CHECK10-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
+// CHECK10:       omp_section_loop.preheader13:
+// CHECK10-NEXT:    store i32 0, i32* [[P_LOWERBOUND28]], align 4
+// CHECK10-NEXT:    store i32 1, i32* [[P_UPPERBOUND29]], align 4
+// CHECK10-NEXT:    store i32 1, i32* [[P_STRIDE30]], align 4
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]], i32 34, i32* [[P_LASTITER27]], i32* [[P_LOWERBOUND28]], i32* [[P_UPPERBOUND29]], i32* [[P_STRIDE30]], i32 1, i32 1)
+// CHECK10-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND28]], align 4
+// CHECK10-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND29]], align 4
 // CHECK10-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
 // CHECK10-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
-// CHECK10:       omp_section_loop.header13:
-// CHECK10-NEXT:    [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_COND14:%.*]]
-// CHECK10:       omp_section_loop.cond14:
-// CHECK10-NEXT:    [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
-// CHECK10-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
-// CHECK10:       omp_section_loop.body15:
-// CHECK10-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14:%.*]]
+// CHECK10:       omp_section_loop.header14:
+// CHECK10-NEXT:    [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ]
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_COND15:%.*]]
+// CHECK10:       omp_section_loop.cond15:
+// CHECK10-NEXT:    [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP10]]
+// CHECK10-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]]
+// CHECK10:       omp_section_loop.body16:
+// CHECK10-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP7]]
 // CHECK10-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
 // CHECK10-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
-// CHECK10-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
-// CHECK10-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
-// CHECK10-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// CHECK10-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC17]] [
+// CHECK10-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]]
+// CHECK10-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]]
 // CHECK10-NEXT:    ]
-// CHECK10:       omp_section_loop.inc16:
-// CHECK10-NEXT:    [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13]]
-// CHECK10:       omp_section_loop.exit17:
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
-// CHECK10:       omp_section_loop.after18:
-// CHECK10-NEXT:    br label [[OMP_SECTIONS_END32:%.*]]
-// CHECK10:       omp_sections.end32:
+// CHECK10:       omp_section_loop.inc17:
+// CHECK10-NEXT:    [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_HEADER14]]
+// CHECK10:       omp_section_loop.exit18:
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM32]])
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
+// CHECK10:       omp_section_loop.after19:
+// CHECK10-NEXT:    br label [[OMP_SECTIONS_END33:%.*]]
+// CHECK10:       omp_sections.end33:
 // CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK10-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK10-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK10-NEXT:    [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK10-NEXT:    store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK10-NEXT:    [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK10-NEXT:    store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK10-NEXT:    store i32 0, i32* [[I]], align 4
 // CHECK10-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK10-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
 // CHECK10-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
 // CHECK10:       omp.precond.then:
 // CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-// CHECK10-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK10-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK10-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
 // CHECK10-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK10-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
-// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK10-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK10-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
-// CHECK10-NEXT:    [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
-// CHECK10-NEXT:    br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK10-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
+// CHECK10-NEXT:    [[CMP38:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// CHECK10-NEXT:    br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK10:       cond.true:
-// CHECK10-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// CHECK10-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
 // CHECK10-NEXT:    br label [[COND_END:%.*]]
 // CHECK10:       cond.false:
 // CHECK10-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
@@ -4569,50 +4587,50 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10:       omp.inner.for.cond:
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK10-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-// CHECK10-NEXT:    [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
-// CHECK10-NEXT:    br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK10-NEXT:    [[CMP39:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// CHECK10-NEXT:    br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK10:       omp.inner.for.body:
 // CHECK10-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
-// CHECK10-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK10-NEXT:    store i32 [[ADD39]], i32* [[I35]], align 4
+// CHECK10-NEXT:    [[ADD40:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK10-NEXT:    store i32 [[ADD40]], i32* [[I36]], align 4
 // CHECK10-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
-// CHECK10-NEXT:    [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
-// CHECK10-NEXT:    br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK10-NEXT:    [[TOBOOL41:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// CHECK10-NEXT:    br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK10:       omp_if.then:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
-// CHECK10-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// CHECK10-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2)
 // CHECK10-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
 // CHECK10-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK10:       .cancel.exit:
 // CHECK10-NEXT:    br label [[CANCEL_EXIT:%.*]]
 // CHECK10:       omp_section_loop.body.case:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3)
 // CHECK10-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
 // CHECK10-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
 // CHECK10:       omp_section_loop.body.case.split:
 // CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
 // CHECK10:       omp_section_loop.body.case.cncl:
 // CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
-// CHECK10:       omp_section_loop.body.case22:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// CHECK10:       omp_section_loop.body.case23:
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3)
 // CHECK10-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
-// CHECK10:       omp_section_loop.body.case22.split:
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK10:       omp_section_loop.body.case22.cncl:
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK10:       omp_section_loop.body.case24:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]]
+// CHECK10:       omp_section_loop.body.case23.split:
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK10:       omp_section_loop.body.case23.cncl:
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK10:       omp_section_loop.body.case25:
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM26:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM26]], i32 3)
 // CHECK10-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
-// CHECK10-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
-// CHECK10:       omp_section_loop.body.case24.split:
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
-// CHECK10:       omp_section_loop.body.case24.cncl:
-// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// CHECK10-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
+// CHECK10:       omp_section_loop.body.case25.split:
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
+// CHECK10:       omp_section_loop.body.case25.cncl:
+// CHECK10-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
 // CHECK10:       .cancel.continue:
 // CHECK10-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK10:       omp_if.else:
@@ -4623,30 +4641,30 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
-// CHECK10-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
-// CHECK10-NEXT:    store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// CHECK10-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK10-NEXT:    store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK10:       omp.inner.for.end:
 // CHECK10-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK10:       omp.loop.exit:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
 // CHECK10-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK10:       cancel.exit:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
-// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// CHECK10-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
 // CHECK10-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK10:       omp.precond.end:
 // CHECK10-NEXT:    br label [[CANCEL_CONT]]
 // CHECK10:       cancel.cont:
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
-// CHECK10-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// CHECK10-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
-// CHECK10-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// CHECK10-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]])
 // CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
 // CHECK10-NEXT:    store i32 0, i32* [[R]], align 4
@@ -4677,25 +4695,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
 // CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
-// CHECK10-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
-// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// CHECK10-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK10-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
-// CHECK10:       .cncl4:
+// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK10:       .cncl5:
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK10:       .cont:
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK10-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK10-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
-// CHECK10-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
-// CHECK10-NEXT:    [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
-// CHECK10-NEXT:    store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// CHECK10-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// CHECK10-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK10-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// CHECK10-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK10-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK10-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
+// CHECK10-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK10-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK10:       omp.par.pre_finalize:
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
@@ -4705,6 +4723,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
 // CHECK10-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK10:       .cncl:
+// CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK10:       .split:
 // CHECK10-NEXT:    br label [[TMP3]]
@@ -5045,4 +5065,3 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
 // CHECK10-NEXT:    ret void
 //
-//
\ No newline at end of file

diff  --git a/clang/test/OpenMP/cancellation_point_codegen.cpp b/clang/test/OpenMP/cancellation_point_codegen.cpp
index 079d9d3db9765..6b08a096a6b9e 100644
--- a/clang/test/OpenMP/cancellation_point_codegen.cpp
+++ b/clang/test/OpenMP/cancellation_point_codegen.cpp
@@ -117,7 +117,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK1-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -157,15 +157,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_3]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_4]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_5]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_6]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_6]], i32* [[DOTOMP_SECTIONS_LB_3]], i32* [[DOTOMP_SECTIONS_UB_4]], i32* [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_6]], i32* [[DOTOMP_SECTIONS_LB_3]], i32* [[DOTOMP_SECTIONS_UB_4]], i32* [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4
 // CHECK1-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1
 // CHECK1-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1
@@ -191,7 +191,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       .cancel.exit12:
 // CHECK1-NEXT:    br label [[CANCEL_EXIT23:%.*]]
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 // CHECK1:       .cancel.continue13:
 // CHECK1-NEXT:    br label [[DOTOMP_SECTIONS_EXIT19]]
@@ -217,10 +217,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC21]], i32* [[DOTOMP_SECTIONS_IV_7]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND8]]
 // CHECK1:       omp.inner.for.end22:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT24:%.*]]
 // CHECK1:       cancel.cont24:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -238,7 +238,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[TMP30]], i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK1-NEXT:    [[CMP29:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]]
@@ -271,7 +271,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       .cancel.exit33:
 // CHECK1-NEXT:    br label [[CANCEL_EXIT40:%.*]]
 // CHECK1:       cancel.exit23:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT24]]
 // CHECK1:       .cancel.continue34:
 // CHECK1-NEXT:    [[TMP41:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
@@ -291,15 +291,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       omp.inner.for.end39:
 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK1:       omp.loop.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       cancel.exit40:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT41:%.*]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    br label [[CANCEL_CONT41]]
 // CHECK1:       cancel.cont41:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP44:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK1-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP44]] to %struct.kmp_task_t_with_privates*
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP45]], i32 0, i32 0
@@ -334,20 +334,22 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
 // CHECK1-NEXT:    br i1 [[TMP5]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK1:       .cancel.exit:
+// CHECK1-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    br label [[RETURN:%.*]]
 // CHECK1:       .cancel.continue:
-// CHECK1-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1)
-// CHECK1-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
-// CHECK1-NEXT:    br i1 [[TMP7]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1)
+// CHECK1-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+// CHECK1-NEXT:    br i1 [[TMP8]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]]
 // CHECK1:       .cancel.exit1:
+// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
 // CHECK1-NEXT:    br label [[RETURN]]
 // CHECK1:       .cancel.continue2:
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK1-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP8]] to i8
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK1-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK1-NEXT:    br label [[RETURN]]
 // CHECK1:       return:
@@ -475,7 +477,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -515,12 +517,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
 // CHECK1-NEXT:    ret void
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -542,7 +544,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK1-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK1-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -591,12 +593,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       cancel.cont:
 // CHECK1-NEXT:    ret void
 // CHECK1:       cancel.exit:
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -639,7 +641,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
@@ -695,12 +697,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK1:       omp.loop.exit:
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP25]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP25]])
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       cancel.exit:
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4
-// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP27]])
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP27]])
 // CHECK1-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK1:       omp.precond.end:
 // CHECK1-NEXT:    br label [[CANCEL_CONT]]
@@ -745,7 +747,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
 // CHECK2-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
@@ -785,15 +787,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_3]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_4]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_5]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_6]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_6]], i32* [[DOTOMP_SECTIONS_LB_3]], i32* [[DOTOMP_SECTIONS_UB_4]], i32* [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_6]], i32* [[DOTOMP_SECTIONS_LB_3]], i32* [[DOTOMP_SECTIONS_UB_4]], i32* [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4
 // CHECK2-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1
 // CHECK2-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1
@@ -819,7 +821,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       .cancel.exit12:
 // CHECK2-NEXT:    br label [[CANCEL_EXIT23:%.*]]
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 // CHECK2:       .cancel.continue13:
 // CHECK2-NEXT:    br label [[DOTOMP_SECTIONS_EXIT19]]
@@ -845,10 +847,10 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC21]], i32* [[DOTOMP_SECTIONS_IV_7]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND8]]
 // CHECK2:       omp.inner.for.end22:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT24:%.*]]
 // CHECK2:       cancel.cont24:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
@@ -866,7 +868,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[TMP30]], i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK2-NEXT:    [[CMP29:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]]
@@ -899,7 +901,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       .cancel.exit33:
 // CHECK2-NEXT:    br label [[CANCEL_EXIT40:%.*]]
 // CHECK2:       cancel.exit23:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT24]]
 // CHECK2:       .cancel.continue34:
 // CHECK2-NEXT:    [[TMP41:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
@@ -919,15 +921,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       omp.inner.for.end39:
 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK2:       omp.loop.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       cancel.exit40:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT41:%.*]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    br label [[CANCEL_CONT41]]
 // CHECK2:       cancel.cont41:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP44:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
 // CHECK2-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP44]] to %struct.kmp_task_t_with_privates*
 // CHECK2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP45]], i32 0, i32 0
@@ -962,20 +964,22 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
 // CHECK2-NEXT:    br i1 [[TMP5]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK2:       .cancel.exit:
+// CHECK2-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
 // CHECK2-NEXT:    br label [[RETURN:%.*]]
 // CHECK2:       .cancel.continue:
-// CHECK2-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1)
-// CHECK2-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
-// CHECK2-NEXT:    br i1 [[TMP7]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1)
+// CHECK2-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+// CHECK2-NEXT:    br i1 [[TMP8]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]]
 // CHECK2:       .cancel.exit1:
+// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]])
 // CHECK2-NEXT:    br label [[RETURN]]
 // CHECK2:       .cancel.continue2:
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP1]], align 4
-// CHECK2-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP8]] to i8
-// CHECK2-NEXT:    [[TMP9:%.*]] = load i8**, i8*** [[TMP0]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4
+// CHECK2-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP10]] to i8
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
 // CHECK2-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK2-NEXT:    br label [[RETURN]]
 // CHECK2:       return:
@@ -1103,7 +1107,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
 // CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
@@ -1143,12 +1147,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
 // CHECK2-NEXT:    ret void
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -1170,7 +1174,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK2-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
 // CHECK2-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
@@ -1219,12 +1223,12 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       cancel.cont:
 // CHECK2-NEXT:    ret void
 // CHECK2:       cancel.exit:
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 //
 //
@@ -1267,7 +1271,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
@@ -1323,16 +1327,15 @@ for (int i = 0; i < argc; ++i) {
 // CHECK2:       omp.loop.exit:
 // CHECK2-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP25]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP25]])
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       cancel.exit:
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4
-// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP27]])
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP27]])
 // CHECK2-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK2:       omp.precond.end:
 // CHECK2-NEXT:    br label [[CANCEL_CONT]]
 // CHECK2:       cancel.cont:
 // CHECK2-NEXT:    ret void
 //
-//
\ No newline at end of file

diff  --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index 7f830d47a7cf2..909b338a18e5f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -25226,4 +25226,4 @@ int main() {
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void
 //
-//
\ No newline at end of file
+//

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
index a3275974ee87f..c1c6300db54e6 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp
@@ -1901,4 +1901,3 @@ struct S {
 // CHECK2-NEXT:    call void @__cxx_global_var_init()
 // CHECK2-NEXT:    ret void
 //
-//
\ No newline at end of file

diff  --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index c60328b18d5b6..e951059304604 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -654,6 +654,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK1:       .cancel.exit:
+// CHECK1-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK1-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK1:       .cancel.continue:
 // CHECK1-NEXT:    ret void
@@ -1683,6 +1684,7 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK2:       .cancel.exit:
+// CHECK2-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK2-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK2:       .cancel.continue:
 // CHECK2-NEXT:    ret void
@@ -2706,6 +2708,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK3:       .cancel.exit:
+// CHECK3-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK3-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK3:       .cancel.continue:
 // CHECK3-NEXT:    ret void
@@ -3711,6 +3714,7 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK4-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK4:       .cancel.exit:
+// CHECK4-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK4-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK4:       .cancel.continue:
 // CHECK4-NEXT:    ret void
@@ -4412,6 +4416,7 @@ int bar(int n){
 // CHECK9-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK9-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK9:       .cancel.exit:
+// CHECK9-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK9-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK9:       .cancel.continue:
 // CHECK9-NEXT:    ret void
@@ -4825,6 +4830,7 @@ int bar(int n){
 // CHECK10-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK10-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK10:       .cancel.exit:
+// CHECK10-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK10-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK10:       .cancel.continue:
 // CHECK10-NEXT:    ret void
@@ -5238,6 +5244,7 @@ int bar(int n){
 // CHECK11-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK11-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK11:       .cancel.exit:
+// CHECK11-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK11-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK11:       .cancel.continue:
 // CHECK11-NEXT:    ret void
@@ -5636,6 +5643,7 @@ int bar(int n){
 // CHECK12-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK12-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK12:       .cancel.exit:
+// CHECK12-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK12-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK12:       .cancel.continue:
 // CHECK12-NEXT:    ret void
@@ -6344,6 +6352,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK17-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK17:       .cancel.exit:
+// CHECK17-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK17-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK17:       .cancel.continue:
 // CHECK17-NEXT:    ret void
@@ -7373,6 +7382,7 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK18-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK18:       .cancel.exit:
+// CHECK18-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK18-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK18:       .cancel.continue:
 // CHECK18-NEXT:    ret void
@@ -8396,6 +8406,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK19-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK19:       .cancel.exit:
+// CHECK19-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK19-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK19:       .cancel.continue:
 // CHECK19-NEXT:    ret void
@@ -9401,6 +9412,7 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK20-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK20:       .cancel.exit:
+// CHECK20-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK20-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK20:       .cancel.continue:
 // CHECK20-NEXT:    ret void
@@ -10102,6 +10114,7 @@ int bar(int n){
 // CHECK25-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK25-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK25:       .cancel.exit:
+// CHECK25-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK25-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK25:       .cancel.continue:
 // CHECK25-NEXT:    ret void
@@ -10515,6 +10528,7 @@ int bar(int n){
 // CHECK26-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK26-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK26:       .cancel.exit:
+// CHECK26-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK26-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK26:       .cancel.continue:
 // CHECK26-NEXT:    ret void
@@ -10928,6 +10942,7 @@ int bar(int n){
 // CHECK27-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK27-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK27:       .cancel.exit:
+// CHECK27-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK27-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK27:       .cancel.continue:
 // CHECK27-NEXT:    ret void
@@ -11326,6 +11341,7 @@ int bar(int n){
 // CHECK28-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 // CHECK28-NEXT:    br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
 // CHECK28:       .cancel.exit:
+// CHECK28-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]])
 // CHECK28-NEXT:    br label [[DOTCANCEL_CONTINUE]]
 // CHECK28:       .cancel.continue:
 // CHECK28-NEXT:    ret void
@@ -11670,4 +11686,4 @@ int bar(int n){
 // CHECK28-NEXT:    store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4
 // CHECK28-NEXT:    ret void
 //
-//
\ No newline at end of file
+//

diff  --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index d05a2a898fbbc..1c798f0885c82 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -19747,4 +19747,4 @@ int bar(int n){
 // CHECK28-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
 // CHECK28-NEXT:    ret void
 //
-//
\ No newline at end of file
+//

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index b815242d2fc51..1b0bd4615118e 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -4832,4 +4832,3 @@ int target_teams_fun(int *g){
 // CHECK13:       omp.precond.end:
 // CHECK13-NEXT:    ret void
 //
-//
\ No newline at end of file

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 355d29153de68..c31965a4c6457 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -6526,4 +6526,3 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK28-NEXT:    ret void
 //
-//
\ No newline at end of file

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2eeeef600b84c..0a249b3e25749 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -531,8 +531,10 @@ class OpenMPIRBuilder {
   ///
   /// \param CancelFlag Flag indicating if the cancellation is performed.
   /// \param CanceledDirective The kind of directive that is cancled.
+  /// \param ExitCB Extra code to be generated in the exit block.
   void emitCancelationCheckImpl(Value *CancelFlag,
-                                omp::Directive CanceledDirective);
+                                omp::Directive CanceledDirective,
+                                FinalizeCallbackTy ExitCB = {});
 
   /// Generate a barrier runtime call.
   ///

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 43c6dd9ab9972..1020de5f30ee9 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -417,9 +417,18 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
   Value *Result = Builder.CreateCall(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
+  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
+    if (CanceledDirective == OMPD_parallel) {
+      IRBuilder<>::InsertPointGuard IPG(Builder);
+      Builder.restoreIP(IP);
+      createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                    omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
+                    /* CheckCancelFlag */ false);
+    }
+  };
 
   // The actual cancel logic is shared with others, e.g., cancel_barriers.
-  emitCancelationCheckImpl(Result, CanceledDirective);
+  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
 
   // Update the insertion point and remove the terminator we introduced.
   Builder.SetInsertPoint(UI->getParent());
@@ -428,8 +437,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-void OpenMPIRBuilder::emitCancelationCheckImpl(
-    Value *CancelFlag, omp::Directive CanceledDirective) {
+void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
+                                               omp::Directive CanceledDirective,
+                                               FinalizeCallbackTy ExitCB) {
   assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
          "Unexpected cancellation!");
 
@@ -457,6 +467,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
   // From the cancellation block we finalize all variables and go to the
   // post finalization block that is known to the FiniCB callback.
   Builder.SetInsertPoint(CancellationBlock);
+  if (ExitCB)
+    ExitCB(Builder.saveIP());
   auto &FI = FinalizationStack.back();
   FI.FiniCB(Builder.saveIP());
 

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index c9e4bed4154be..0df6fe531e3c1 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -231,7 +231,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
   auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
   Builder.restoreIP(NewIP);
   EXPECT_FALSE(M->global_empty());
-  EXPECT_EQ(M->size(), 3U);
+  EXPECT_EQ(M->size(), 4U);
   EXPECT_EQ(F->size(), 4U);
   EXPECT_EQ(BB->size(), 4U);
 
@@ -252,7 +252,20 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
+  CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
+  EXPECT_NE(GTID1, nullptr);
+  EXPECT_EQ(GTID1->getNumArgOperands(), 1U);
+  EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
+  CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
+  EXPECT_NE(Barrier, nullptr);
+  EXPECT_EQ(Barrier->getNumArgOperands(), 2U);
+  EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
+  EXPECT_EQ(Barrier->getNumUses(), 0U);
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
@@ -286,7 +299,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
   auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
   Builder.restoreIP(NewIP);
   EXPECT_FALSE(M->global_empty());
-  EXPECT_EQ(M->size(), 3U);
+  EXPECT_EQ(M->size(), 4U);
   EXPECT_EQ(F->size(), 7U);
   EXPECT_EQ(BB->size(), 1U);
   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
@@ -313,7 +326,20 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), NewIP.getBlock());
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
+  CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
+  EXPECT_NE(GTID1, nullptr);
+  EXPECT_EQ(GTID1->getNumArgOperands(), 1U);
+  EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
+  CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
+  EXPECT_NE(Barrier, nullptr);
+  EXPECT_EQ(Barrier->getNumArgOperands(), 2U);
+  EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
+  EXPECT_EQ(Barrier->getNumUses(), 0U);
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),


        


More information about the llvm-commits mailing list