[flang-commits] [clang] [flang] [llvm] [mlir] [mlir][OpenMP] fix crash outlining infinite loop (PR #129872)

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Wed Mar 5 09:00:18 PST 2025


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/129872

>From 3055b89b8ab937bfd4ba70d87e1733b05156d0ac Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 4 Mar 2025 17:19:53 +0000
Subject: [PATCH 1/2] [mlir][OpenMP] fix crash outlining infinite loop

Previously an extra block was created by splitting the previous exit
block. This produced incorrect results when the outlined region
statically never terminated because then there wouldn't be a valid exit
block for the outlined region, this caused this newly added block to
have an incoming edge from outside of the outlining region, which caused
outlining to fail.

So far as I can tell this extra block no longer serves any purpose. The
comment says it is supposed to collate multiple control flow edges into
one place, but the code as it is now does not achieve this. In fact, as
can be seen from the changes to lit tests, this block was not actually
outlined in the end. This is because there are actually two code
extractors: one in the callback for creating a parallel op which is used
to find what the input/output variables are (which does have this block
added to it), and another one which actually does the outlining (which
this block was not added to).

Tested with the gfortran and fujitsu test suites.
---
 .../parallel-private-reduction-worstcase.f90  |  2 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  8 ----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  4 +-
 .../LLVMIR/openmp-outline-infinite-loop.mlir  | 44 +++++++++++++++++++
 .../openmp-parallel-reduction-multiblock.mlir |  6 +--
 .../openmp-reduction-array-sections.mlir      |  2 +-
 .../LLVMIR/openmp-reduction-init-arg.mlir     |  6 +--
 .../LLVMIR/openmp-reduction-sections.mlir     |  2 +-
 8 files changed, 52 insertions(+), 22 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir

diff --git a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
index 7e735b6499504..cf77c46346b7f 100644
--- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
@@ -219,5 +219,5 @@ subroutine worst_case(a, b, c, d)
 !                [var extent was non-zero: malloc a private array]
 ! CHECK:         br label %omp.private.init5
 
-! CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %omp.region.cont52
+! CHECK:       omp.par.exit.exitStub:                           ; preds = %omp.region.cont52
 ! CHECK-NEXT:    ret void
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e34e93442ff85..0295319ec1e9d 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1602,14 +1602,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
   SmallVector<BasicBlock *, 32> Blocks;
   OI.collectBlocks(ParallelRegionBlockSet, Blocks);
 
-  // Ensure a single exit node for the outlined region by creating one.
-  // We might have multiple incoming edges to the exit now due to finalizations,
-  // e.g., cancel calls that cause the control flow to leave the region.
-  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
-  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
-  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
-  Blocks.push_back(PRegOutlinedExitBB);
-
   CodeExtractorAnalysisCache CEAC(*OuterFn);
   CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
                           /* AggregateArgs */ false,
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index f25ba4aa3c8dc..e4114d491fc85 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -162,14 +162,12 @@ llvm.func @test_omp_parallel_if_1(%arg0: i32) -> () {
 // CHECK: %[[I32_IF_COND_VAR_1:.*]] = sext i1 %[[IF_COND_VAR_1]] to i32
 // CHECK: call void @__kmpc_fork_call_if(ptr @[[SI_VAR_IF_1]], i32 0, ptr @[[OMP_OUTLINED_FN_IF_1:.*]], i32 %[[I32_IF_COND_VAR_1]], ptr null)
 // CHECK: br label %[[OUTLINED_EXIT_IF_1:.*]]
-// CHECK: [[OUTLINED_EXIT_IF_1]]:
-// CHECK: br label %[[RETURN_BLOCK_IF_1:.*]]
   omp.parallel if(%1) {
     omp.barrier
     omp.terminator
   }
 
-// CHECK: [[RETURN_BLOCK_IF_1]]:
+// CHECK: [[OUTLINED_EXIT_IF_1]]:
 // CHECK: ret void
   llvm.return
 }
diff --git a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
new file mode 100644
index 0000000000000..faccfc678adfe
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
@@ -0,0 +1,44 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Test that trying to outline an infinite loop doesn't lead to an assertion
+// failure.
+
+llvm.func @parallel_infinite_loop() -> () {
+  omp.parallel {
+    llvm.br ^bb1
+  ^bb1:
+    llvm.br ^bb1
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @parallel_infinite_loop() {
+// CHECK:         %[[VAL_2:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         br label %[[VAL_3:.*]]
+// CHECK:       omp_parallel:
+// CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 0, ptr @parallel_infinite_loop..omp_par)
+// CHECK:         unreachable
+// CHECK:       omp.region.cont:                                  ; No predecessors!
+// CHECK:         br label %[[VAL_4:.*]]
+// CHECK:       omp.par.pre_finalize:                             ; preds = %[[VAL_5:.*]]
+// CHECK:         br label %[[VAL_6:.*]]
+// CHECK:       omp.par.exit:                                     ; preds = %[[VAL_4]]
+// CHECK:         ret void
+// CHECK:       }
+
+// CHECK-LABEL: define internal void @parallel_infinite_loop..omp_par(
+// CHECK-SAME:      ptr noalias %[[TID_ADDR:.*]], ptr noalias %[[ZERO_ADDR:.*]])
+// CHECK:       omp.par.entry:
+// CHECK:         %[[VAL_7:.*]] = alloca i32, align 4
+// CHECK:         %[[VAL_8:.*]] = load i32, ptr %[[VAL_9:.*]], align 4
+// CHECK:         store i32 %[[VAL_8]], ptr %[[VAL_7]], align 4
+// CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_7]], align 4
+// CHECK:         br label %[[VAL_11:.*]]
+// CHECK:       omp.region.after_alloca:                          ; preds = %[[VAL_12:.*]]
+// CHECK:         br label %[[VAL_13:.*]]
+// CHECK:       omp.par.region:                                   ; preds = %[[VAL_11]]
+// CHECK:         br label %[[VAL_14:.*]]
+// CHECK:       omp.par.region1:                                  ; preds = %[[VAL_13]]
+// CHECK:         br label %[[VAL_15:.*]]
+// CHECK:       omp.par.region2:                                  ; preds = %[[VAL_15]], %[[VAL_14]]
+// CHECK:         br label %[[VAL_15]]
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
index d2e394b2cf6a8..887d2977e45cc 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
@@ -40,9 +40,7 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: !
 // CHECK:         store ptr %[[VAL_8:.*]], ptr %[[VAL_7]], align 8
 // CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @missordered_blocks_..omp_par, ptr %[[VAL_0]])
 // CHECK:         br label %[[VAL_9:.*]]
-// CHECK:       omp.par.outlined.exit:                            ; preds = %[[VAL_4]]
-// CHECK:         br label %[[VAL_10:.*]]
-// CHECK:       omp.par.exit.split:                               ; preds = %[[VAL_9]]
+// CHECK:       omp.par.exit:                                     ; preds = %[[VAL_4]]
 // CHECK:         ret void
 // CHECK:       [[PAR_ENTRY:omp.par.entry]]:
 // CHECK:         %[[VAL_11:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12:.*]], i32 0, i32 0
@@ -117,5 +115,5 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: !
 // CHECK:         br label %[[VAL_38]]
 // CHECK:       omp.reduction.neutral1:                           ; preds = %[[VAL_25]]
 // CHECK:         br label %[[VAL_30]]
-// CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %[[VAL_53]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = %[[VAL_53]]
 // CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index d6ed3086969fb..b302b4b20edd5 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -219,5 +219,5 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
 // CHECK:       omp_section_loop.inc:                             ; preds = %[[VAL_69]]
 // CHECK:         %[[VAL_31]] = add nuw i32 %[[VAL_30]], 1
 // CHECK:         br label %[[VAL_28]]
-// CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %[[VAL_64]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = %[[VAL_64]]
 // CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
index 8d329bd8ff817..a714ca68a1e95 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
@@ -46,9 +46,7 @@ module {
 // CHECK:         store ptr %[[VAL_2]], ptr %[[VAL_8]], align 8
 // CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @_QFPreduce..omp_par, ptr %[[VAL_0]])
 // CHECK:         br label %[[VAL_9:.*]]
-// CHECK:       omp.par.outlined.exit:                            ; preds = %[[VAL_6]]
-// CHECK:         br label %[[VAL_10:.*]]
-// CHECK:       omp.par.exit.split:                               ; preds = %[[VAL_9]]
+// CHECK:       omp.par.exit:                                     ; preds = %[[VAL_6]]
 // CHECK:         ret void
 // CHECK:       [[PAR_ENTRY:omp.par.entry]]:
 // CHECK:         %[[VAL_11:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12:.*]], i32 0, i32 0
@@ -99,7 +97,7 @@ module {
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = %[[VAL_33]]
 // CHECK:         br label %[[VAL_39:.*]]
-// CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %[[VAL_38]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = %[[VAL_38]]
 // CHECK:         ret void
 // CHECK:         %[[VAL_40:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_41:.*]], i64 0, i64 0
 // CHECK:         %[[VAL_42:.*]] = load ptr, ptr %[[VAL_40]], align 8
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
index de3b997feb674..19da6f8517fcd 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
@@ -144,7 +144,7 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in
 // CHECK:       omp_section_loop.inc:                             ; preds = %[[VAL_59]]
 // CHECK:         %[[VAL_35]] = add nuw i32 %[[VAL_34]], 1
 // CHECK:         br label %[[VAL_32]]
-// CHECK:       omp.par.outlined.exit.exitStub:                   ; preds = %[[VAL_54]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = %[[VAL_54]]
 // CHECK:         ret void
 // CHECK:         %[[VAL_70:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_71:.*]], i64 0, i64 0
 // CHECK:         %[[VAL_72:.*]] = load ptr, ptr %[[VAL_70]], align 8

>From 76557a967a37bd4150e3e17c92d4d51d2a580ef9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 5 Mar 2025 16:58:40 +0000
Subject: [PATCH 2/2] Update clang tests

---
 clang/test/OpenMP/cancel_codegen.cpp          |   6 +-
 .../irbuilder_nested_openmp_parallel_empty.c  |  18 +-
 .../OpenMP/irbuilder_nested_parallel_for.c    | 188 ++++++++----------
 clang/test/OpenMP/nested_loop_codegen.cpp     |  26 +--
 clang/test/OpenMP/parallel_codegen.cpp        |  24 +--
 clang/test/OpenMP/taskgroup_codegen.cpp       |   6 +-
 6 files changed, 108 insertions(+), 160 deletions(-)

diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 7a95c0138b3a1..16e7542a8e826 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -728,9 +728,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    store ptr [[ARGV_ADDR]], ptr [[GEP_ARGV_ADDR]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK3:       omp.par.outlined.exit:
-// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK3:       omp.par.exit.split:
+// CHECK3:       omp.par.exit:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
 // CHECK3:       omp_section_loop.preheader:
 // CHECK3-NEXT:    store i32 0, ptr [[P_LOWERBOUND]], align 4
@@ -998,7 +996,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK3:       .split:
 // CHECK3-NEXT:    br label [[TMP4]]
-// CHECK3:       omp.par.outlined.exit.exitStub:
+// CHECK3:       omp.par.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
index 7bdb9749757cc..96962f71c709d 100644
--- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -16,10 +16,10 @@
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
 // ALL-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z17nested_parallel_0v..omp_par.1)
-// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
-// ALL:       omp.par.outlined.exit12:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// ALL:       omp.par.exit7:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// ALL:       omp.par.exit.split:
+// ALL:       omp.par.exit.exitStub:
 // ALL-NEXT:    ret void
 //
 void nested_parallel_0(void) {
@@ -50,10 +50,8 @@ void nested_parallel_0(void) {
 // ALL-NEXT:    [[GEP_R_ADDR17:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG14]], i32 0, i32 2
 // ALL-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR17]], align 8
 // ALL-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z17nested_parallel_1Pfid..omp_par.2, ptr [[STRUCTARG14]])
-// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
-// ALL:       omp.par.outlined.exit13:
-// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// ALL:       omp.par.exit.split:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// ALL:       omp.par.exit:
 // ALL-NEXT:    ret void
 //
 void nested_parallel_1(float *r, int a, double b) {
@@ -85,10 +83,8 @@ void nested_parallel_1(float *r, int a, double b) {
 // ALL-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
 // ALL-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
 // ALL-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z17nested_parallel_2Pfid..omp_par.5, ptr [[STRUCTARG]])
-// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
-// ALL:       omp.par.outlined.exit55:
-// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// ALL:       omp.par.exit.split:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// ALL:       omp.par.exit:
 // ALL-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // ALL-NEXT:    [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
 // ALL-NEXT:    [[TMP1:%.*]] = load double, ptr [[B_ADDR]], align 8
diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
index 68727e0503ede..ae3570fda412d 100644
--- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -78,10 +78,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par)
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK:       omp.par.outlined.exit:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// CHECK:       omp.par.exit:
 // CHECK-NEXT:    ret void
 //
 //
@@ -147,7 +145,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
-// CHECK:       omp.par.outlined.exit.exitStub:
+// CHECK:       omp.par.exit.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -231,10 +229,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2
 // CHECK-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]])
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
-// CHECK:       omp.par.outlined.exit16:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// CHECK:       omp.par.exit:
 // CHECK-NEXT:    ret void
 //
 //
@@ -264,16 +260,14 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
 // CHECK-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]])
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK:       omp.par.outlined.exit:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT7_SPLIT:%.*]]
-// CHECK:       omp.par.exit7.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// CHECK:       omp.par.exit7:
 // CHECK-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]]
 // CHECK:       omp.par.region.parallel.after:
 // CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK:       omp.par.pre_finalize:
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]]
-// CHECK:       omp.par.outlined.exit16.exitStub:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT16_EXITSTUB:%.*]]
+// CHECK:       omp.par.exit.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -352,7 +346,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
-// CHECK:       omp.par.outlined.exit.exitStub:
+// CHECK:       omp.par.exit7.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -444,10 +438,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
 // CHECK-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]])
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
-// CHECK:       omp.par.outlined.exit184:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK:       omp.par.exit.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT184:%.*]]
+// CHECK:       omp.par.exit:
 // CHECK-NEXT:    store i32 0, ptr [[I185]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[I185]], ptr [[TMP0]], align 8
@@ -457,7 +449,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]])
 // CHECK-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]]
-// CHECK:       omp_loop.preheader190:
+// CHECK:       omp_loop.preheader187:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4
@@ -469,13 +461,13 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
 // CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]]
-// CHECK:       omp_loop.header191:
+// CHECK:       omp_loop.header188:
 // CHECK-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND192:%.*]]
-// CHECK:       omp_loop.cond192:
+// CHECK:       omp_loop.cond189:
 // CHECK-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]]
-// CHECK:       omp_loop.body193:
+// CHECK:       omp_loop.body190:
 // CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]]
 // CHECK-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]])
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
@@ -486,15 +478,15 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC194]]
-// CHECK:       omp_loop.inc194:
+// CHECK:       omp_loop.inc191:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER191]]
-// CHECK:       omp_loop.exit195:
+// CHECK:       omp_loop.exit192:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM208]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]]
-// CHECK:       omp_loop.after196:
+// CHECK:       omp_loop.after193:
 // CHECK-NEXT:    ret void
 //
 //
@@ -573,10 +565,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2
 // CHECK-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]])
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159:%.*]]
-// CHECK:       omp.par.outlined.exit159:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT11_SPLIT:%.*]]
-// CHECK:       omp.par.exit11.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT159:%.*]]
+// CHECK:       omp.par.exit11:
 // CHECK-NEXT:    store i32 0, ptr [[I160]], align 4
 // CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[I160]], ptr [[TMP10]], align 8
@@ -586,7 +576,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]])
 // CHECK-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]]
-// CHECK:       omp_loop.preheader165:
+// CHECK:       omp_loop.preheader163:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4
 // CHECK-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1
 // CHECK-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4
@@ -598,24 +588,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]]
 // CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]]
-// CHECK:       omp_loop.header166:
+// CHECK:       omp_loop.header164:
 // CHECK-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND167:%.*]]
-// CHECK:       omp_loop.cond167:
+// CHECK:       omp_loop.cond165:
 // CHECK-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]]
-// CHECK:       omp_loop.exit170:
+// CHECK:       omp_loop.exit168:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM182]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM183]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]]
-// CHECK:       omp_loop.after171:
+// CHECK:       omp_loop.after169:
 // CHECK-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]]
 // CHECK:       omp.par.region.parallel.after:
 // CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK:       omp.par.pre_finalize:
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]]
-// CHECK:       omp_loop.body168:
+// CHECK:       omp_loop.body166:
 // CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]]
 // CHECK-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]])
 // CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
@@ -626,7 +616,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC169]]
-// CHECK:       omp_loop.inc169:
+// CHECK:       omp_loop.inc167:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER166]]
 // CHECK:       omp_loop.body:
@@ -643,7 +633,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER]]
-// CHECK:       omp.par.outlined.exit184.exitStub:
+// CHECK:       omp.par.exit.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -731,10 +721,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
 // CHECK-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]])
-// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK:       omp.par.outlined.exit:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT46_SPLIT:%.*]]
-// CHECK:       omp.par.exit46.split:
+// CHECK-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// CHECK:       omp.par.exit46:
 // CHECK-NEXT:    store i32 0, ptr [[I75]], align 4
 // CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[I75]], ptr [[TMP10]], align 8
@@ -770,7 +758,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.after86:
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL213:%.*]]
-// CHECK:       omp_parallel213:
+// CHECK:       omp_parallel210:
 // CHECK-NEXT:    [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8
 // CHECK-NEXT:    [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1
@@ -779,9 +767,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]])
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134:%.*]]
-// CHECK:       omp.par.outlined.exit134:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT105_SPLIT:%.*]]
-// CHECK:       omp.par.exit105.split:
+// CHECK:       omp.par.exit105:
 // CHECK-NEXT:    store i32 0, ptr [[I135]], align 4
 // CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[I135]], ptr [[TMP18]], align 8
@@ -791,7 +777,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]])
 // CHECK-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]]
-// CHECK:       omp_loop.preheader140:
+// CHECK:       omp_loop.preheader139:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4
 // CHECK-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1
 // CHECK-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4
@@ -803,24 +789,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]]
 // CHECK-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]]
-// CHECK:       omp_loop.header141:
+// CHECK:       omp_loop.header140:
 // CHECK-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND142:%.*]]
-// CHECK:       omp_loop.cond142:
+// CHECK:       omp_loop.cond141:
 // CHECK-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]]
-// CHECK:       omp_loop.exit145:
+// CHECK:       omp_loop.exit144:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM157]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM158]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]]
-// CHECK:       omp_loop.after146:
+// CHECK:       omp_loop.after145:
 // CHECK-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]]
 // CHECK:       omp.par.region9.parallel.after:
 // CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE10:%.*]]
 // CHECK:       omp.par.pre_finalize10:
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]]
-// CHECK:       omp_loop.body143:
+// CHECK:       omp_loop.body142:
 // CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]]
 // CHECK-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]])
 // CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
@@ -831,7 +817,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC144]]
-// CHECK:       omp_loop.inc144:
+// CHECK:       omp_loop.inc143:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER141]]
 // CHECK:       omp_loop.body83:
@@ -862,7 +848,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc25:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER22]]
-// CHECK:       omp.par.outlined.exit159.exitStub:
+// CHECK:       omp.par.exit11.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -941,7 +927,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc119:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER116]]
-// CHECK:       omp.par.outlined.exit134.exitStub:
+// CHECK:       omp.par.exit105.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -1020,7 +1006,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.inc60:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER57]]
-// CHECK:       omp.par.outlined.exit.exitStub:
+// CHECK:       omp.par.exit46.exitStub:
 // CHECK-NEXT:    ret void
 //
 //
@@ -1512,9 +1498,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_parallel:
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14parallel_for_0v..omp_par), !dbg [[DBG14:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG:       omp.par.exit:
 // CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG18:![0-9]+]]
 //
 //
@@ -1581,7 +1565,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG27]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG27]]
-// CHECK-DEBUG:       omp.par.outlined.exit.exitStub:
+// CHECK-DEBUG:       omp.par.exit.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -1677,9 +1661,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit16:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG:       omp.par.exit:
 // CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG84:![0-9]+]]
 //
 //
@@ -1710,15 +1692,13 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT7_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit7.split:
+// CHECK-DEBUG:       omp.par.exit7:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]]
-// CHECK-DEBUG:       omp.par.outlined.exit16.exitStub:
+// CHECK-DEBUG:       omp.par.exit.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -1798,7 +1778,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]]
-// CHECK-DEBUG:       omp.par.outlined.exit.exitStub:
+// CHECK-DEBUG:       omp.par.exit7.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -1901,10 +1881,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2
 // CHECK-DEBUG-NEXT:    store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit184:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit.split:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT184:%.*]]
+// CHECK-DEBUG:       omp.par.exit:
 // CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I185]], [[META144:![0-9]+]], !DIExpression(), [[META147:![0-9]+]])
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[I185]], align 4, !dbg [[META147]]
 // CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
@@ -1915,7 +1893,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.preheader190:
+// CHECK-DEBUG:       omp_loop.preheader187:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
@@ -1927,13 +1905,13 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.header191:
+// CHECK-DEBUG:       omp_loop.header188:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.cond192:
+// CHECK-DEBUG:       omp_loop.cond189:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.body193:
+// CHECK-DEBUG:       omp_loop.body190:
 // CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]]
@@ -1944,15 +1922,15 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.inc194:
+// CHECK-DEBUG:       omp_loop.inc191:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.exit195:
+// CHECK-DEBUG:       omp_loop.exit192:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.after196:
+// CHECK-DEBUG:       omp_loop.after193:
 // CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
 //
 //
@@ -2033,9 +2011,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG166:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit159:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT11_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit11.split:
+// CHECK-DEBUG:       omp.par.exit11:
 // CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I160]], [[META170:![0-9]+]], !DIExpression(), [[META173:![0-9]+]])
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[I160]], align 4, !dbg [[META173]]
 // CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]]
@@ -2046,7 +2022,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.preheader165:
+// CHECK-DEBUG:       omp_loop.preheader163:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]]
@@ -2058,24 +2034,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.header166:
+// CHECK-DEBUG:       omp_loop.header164:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.cond167:
+// CHECK-DEBUG:       omp_loop.cond165:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.exit170:
+// CHECK-DEBUG:       omp_loop.exit168:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.after171:
+// CHECK-DEBUG:       omp_loop.after169:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]]
-// CHECK-DEBUG:       omp_loop.body168:
+// CHECK-DEBUG:       omp_loop.body166:
 // CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]]
@@ -2086,7 +2062,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC169]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.inc169:
+// CHECK-DEBUG:       omp_loop.inc167:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]]
 // CHECK-DEBUG:       omp_loop.body:
@@ -2103,7 +2079,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]]
-// CHECK-DEBUG:       omp.par.outlined.exit184.exitStub:
+// CHECK-DEBUG:       omp.par.exit.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -2193,9 +2169,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT46_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit46.split:
+// CHECK-DEBUG:       omp.par.exit46:
 // CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I75]], [[META201:![0-9]+]], !DIExpression(), [[META204:![0-9]+]])
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[I75]], align 4, !dbg [[META204]]
 // CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]]
@@ -2232,7 +2206,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.after86:
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL213:%.*]]
-// CHECK-DEBUG:       omp_parallel213:
+// CHECK-DEBUG:       omp_parallel210:
 // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8
 // CHECK-DEBUG-NEXT:    [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 1
@@ -2241,9 +2215,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8
 // CHECK-DEBUG-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG209:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134:%.*]]
-// CHECK-DEBUG:       omp.par.outlined.exit134:
-// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT105_SPLIT:%.*]]
-// CHECK-DEBUG:       omp.par.exit105.split:
+// CHECK-DEBUG:       omp.par.exit105:
 // CHECK-DEBUG-NEXT:      #dbg_declare(ptr [[I135]], [[META213:![0-9]+]], !DIExpression(), [[META216:![0-9]+]])
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[I135]], align 4, !dbg [[META216]]
 // CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]]
@@ -2254,7 +2226,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.preheader140:
+// CHECK-DEBUG:       omp_loop.preheader139:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]]
@@ -2266,24 +2238,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.header141:
+// CHECK-DEBUG:       omp_loop.header140:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.cond142:
+// CHECK-DEBUG:       omp_loop.cond141:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.exit145:
+// CHECK-DEBUG:       omp_loop.exit144:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.after146:
+// CHECK-DEBUG:       omp_loop.after145:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region9.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE10:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize10:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]]
-// CHECK-DEBUG:       omp_loop.body143:
+// CHECK-DEBUG:       omp_loop.body142:
 // CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]]
@@ -2294,7 +2266,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC144]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.inc144:
+// CHECK-DEBUG:       omp_loop.inc143:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]]
 // CHECK-DEBUG:       omp_loop.body83:
@@ -2325,7 +2297,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc25:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]]
-// CHECK-DEBUG:       omp.par.outlined.exit159.exitStub:
+// CHECK-DEBUG:       omp.par.exit11.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -2405,7 +2377,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc119:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG241]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER116]], !dbg [[DBG241]]
-// CHECK-DEBUG:       omp.par.outlined.exit134.exitStub:
+// CHECK-DEBUG:       omp.par.exit105.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
@@ -2485,7 +2457,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.inc60:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG257]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER57]], !dbg [[DBG257]]
-// CHECK-DEBUG:       omp.par.outlined.exit.exitStub:
+// CHECK-DEBUG:       omp.par.exit46.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp
index a08950e2e55c7..d8fab26bf1e7f 100644
--- a/clang/test/OpenMP/nested_loop_codegen.cpp
+++ b/clang/test/OpenMP/nested_loop_codegen.cpp
@@ -530,10 +530,8 @@ int inline_decl() {
 // CHECK3-NEXT:    [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
 // CHECK3-NEXT:    store ptr [[K]], ptr [[GEP_K]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]])
-// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK3:       omp.par.outlined.exit:
-// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK3:       omp.par.exit.split:
+// CHECK3-NEXT:    br label [[OMP_PAR_EXIT:%.*]]
+// CHECK3:       omp.par.exit:
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[K]], align 4
 // CHECK3-NEXT:    ret i32 [[TMP0]]
 //
@@ -620,7 +618,7 @@ int inline_decl() {
 // CHECK3:       omp_loop.inc:
 // CHECK3-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK3-NEXT:    br label [[OMP_LOOP_HEADER]]
-// CHECK3:       omp.par.outlined.exit.exitStub:
+// CHECK3:       omp.par.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
 //
@@ -699,9 +697,7 @@ int inline_decl() {
 // CHECK3-NEXT:    store ptr [[RES]], ptr [[GEP_RES]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK3:       omp.par.outlined.exit:
-// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK3:       omp.par.exit.split:
+// CHECK3:       omp.par.exit:
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RES]], align 4
 // CHECK3-NEXT:    ret i32 [[TMP0]]
 //
@@ -789,7 +785,7 @@ int inline_decl() {
 // CHECK3:       omp_loop.inc:
 // CHECK3-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
 // CHECK3-NEXT:    br label [[OMP_LOOP_HEADER]]
-// CHECK3:       omp.par.outlined.exit.exitStub:
+// CHECK3:       omp.par.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
 //
@@ -870,9 +866,7 @@ int inline_decl() {
 // CHECK4-NEXT:    store ptr [[K]], ptr [[GEP_K]], align 8
 // CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z12outline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG18:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK4:       omp.par.outlined.exit:
-// CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK4:       omp.par.exit.split:
+// CHECK4:       omp.par.exit:
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG20:![0-9]+]]
 // CHECK4-NEXT:    ret i32 [[TMP0]], !dbg [[DBG20]]
 //
@@ -959,7 +953,7 @@ int inline_decl() {
 // CHECK4:       omp_loop.inc:
 // CHECK4-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG28]]
 // CHECK4-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG28]]
-// CHECK4:       omp.par.outlined.exit.exitStub:
+// CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
@@ -1048,9 +1042,7 @@ int inline_decl() {
 // CHECK4-NEXT:    store ptr [[RES]], ptr [[GEP_RES]], align 8
 // CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG82:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK4:       omp.par.outlined.exit:
-// CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK4:       omp.par.exit.split:
+// CHECK4:       omp.par.exit:
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG84:![0-9]+]]
 // CHECK4-NEXT:    ret i32 [[TMP0]], !dbg [[DBG84]]
 //
@@ -1139,7 +1131,7 @@ int inline_decl() {
 // CHECK4:       omp_loop.inc:
 // CHECK4-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[META95]]
 // CHECK4-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[META95]]
-// CHECK4:       omp.par.outlined.exit.exitStub:
+// CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp
index 2a0a881b109be..c63c6f554f4ae 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -736,9 +736,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr [[VLA]], ptr [[GEP_VLA]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK3:       omp.par.outlined.exit:
-// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK3:       omp.par.exit.split:
+// CHECK3:       omp.par.exit:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
 // CHECK3-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]])
 // CHECK3-NEXT:    store i32 [[CALL]], ptr [[RETVAL]], align 4
@@ -770,7 +768,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-// CHECK3:       omp.par.outlined.exit.exitStub:
+// CHECK3:       omp.par.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
 //
@@ -805,9 +803,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK3:       omp.par.outlined.exit:
-// CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK3:       omp.par.exit.split:
+// CHECK3:       omp.par.exit:
 // CHECK3-NEXT:    ret i32 0
 //
 //
@@ -837,7 +833,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-// CHECK3:       omp.par.outlined.exit.exitStub:
+// CHECK3:       omp.par.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
 //
@@ -878,9 +874,7 @@ int main (int argc, char **argv) {
 // CHECK4-NEXT:    store ptr [[VLA]], ptr [[GEP_VLA]], align 8
 // CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG30:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK4:       omp.par.outlined.exit:
-// CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK4:       omp.par.exit.split:
+// CHECK4:       omp.par.exit:
 // CHECK4-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG31:![0-9]+]]
 // CHECK4-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]), !dbg [[DBG31]]
 // CHECK4-NEXT:    store i32 [[CALL]], ptr [[RETVAL]], align 4, !dbg [[DBG31]]
@@ -912,7 +906,7 @@ int main (int argc, char **argv) {
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]]
-// CHECK4:       omp.par.outlined.exit.exitStub:
+// CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
@@ -949,9 +943,7 @@ int main (int argc, char **argv) {
 // CHECK4-NEXT:    store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8
 // CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG52:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK4:       omp.par.outlined.exit:
-// CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK4:       omp.par.exit.split:
+// CHECK4:       omp.par.exit:
 // CHECK4-NEXT:    ret i32 0, !dbg [[DBG54:![0-9]+]]
 //
 //
@@ -982,7 +974,7 @@ int main (int argc, char **argv) {
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]]
-// CHECK4:       omp.par.outlined.exit.exitStub:
+// CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp
index 61105a6472e37..72653144d08dd 100644
--- a/clang/test/OpenMP/taskgroup_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_codegen.cpp
@@ -224,9 +224,7 @@ void parallel_taskgroup() {
 // CHECK2:       omp_parallel:
 // CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z18parallel_taskgroupv..omp_par)
 // CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
-// CHECK2:       omp.par.outlined.exit:
-// CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
-// CHECK2:       omp.par.exit.split:
+// CHECK2:       omp.par.exit:
 // CHECK2-NEXT:    ret void
 //
 //
@@ -250,6 +248,6 @@ void parallel_taskgroup() {
 // CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK2:       omp.par.pre_finalize:
 // CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-// CHECK2:       omp.par.outlined.exit.exitStub:
+// CHECK2:       omp.par.exit.exitStub:
 // CHECK2-NEXT:    ret void
 //



More information about the flang-commits mailing list