[llvm] 4c94715 - [Attributor] Set floating point loads and stores as nofree in AANoFreeFloating

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 9 13:16:56 PDT 2021


Author: Joseph Huber
Date: 2021-06-09T16:16:37-04:00
New Revision: 4c9471581fa07331b9ec8e4ab1ea312fba43c0b6

URL: https://github.com/llvm/llvm-project/commit/4c9471581fa07331b9ec8e4ab1ea312fba43c0b6
DIFF: https://github.com/llvm/llvm-project/commit/4c9471581fa07331b9ec8e4ab1ea312fba43c0b6.diff

LOG: [Attributor] Set floating point loads and stores as nofree in AANoFreeFloating

Summary:
The current implementation of AANoFreeFloating will incorrectly list floating
point loads and stores as may-free. This prevents other attributor instances
like HeapToStack from pushing some allocations to the stack.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D103975

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
    llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
    llvm/test/Transforms/Attributor/callbacks.ll
    llvm/test/Transforms/Attributor/heap_to_stack.ll
    llvm/test/Transforms/Attributor/memory_locations.ll
    llvm/test/Transforms/Attributor/nocapture-1.ll
    llvm/test/Transforms/Attributor/nofree.ll
    llvm/test/Transforms/Attributor/nonnull.ll
    llvm/test/Transforms/Attributor/norecurse.ll
    llvm/test/Transforms/Attributor/range.ll
    llvm/test/Transforms/Attributor/readattrs.ll
    llvm/test/Transforms/Attributor/value-simplify.ll
    llvm/test/Transforms/Attributor/willreturn.ll
    llvm/test/Transforms/OpenMP/parallel_deletion.ll
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 78ff7a65c0185..ea627cac344e9 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1499,7 +1499,8 @@ struct AANoFreeFloating : AANoFreeImpl {
         Follow = true;
         return true;
       }
-      if (isa<ReturnInst>(UserI))
+      if (isa<StoreInst>(UserI) || isa<LoadInst>(UserI) ||
+          isa<ReturnInst>(UserI))
         return true;
 
       // Unknown user.

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
index 0155770bc6c2e..2fdaa8be8541a 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -8,7 +8,7 @@ define void @f() {
 ; IS________OPM-LABEL: define {{[^@]+}}@f() {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[A:%.*]] = alloca i32, align 1
-; IS________OPM-NEXT:    call void @g(i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A]])
+; IS________OPM-NEXT:    call void @g(i32* noalias nocapture nofree noundef nonnull readonly dereferenceable(4) [[A]])
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@f() {
@@ -26,7 +26,7 @@ entry:
 
 define internal void @g(i32* %a) {
 ; IS________OPM-LABEL: define {{[^@]+}}@g
-; IS________OPM-SAME: (i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) {
+; IS________OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly dereferenceable(4) [[A:%.*]]) {
 ; IS________OPM-NEXT:    [[AA:%.*]] = load i32, i32* [[A]], align 1
 ; IS________OPM-NEXT:    call void @z(i32 [[AA]])
 ; IS________OPM-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
index 3e9dc2fa4dacd..5efd9ef20a591 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
@@ -8,7 +8,7 @@ declare void @sink(i32)
 
 define internal void @test(i32** %X) !dbg !2 {
 ; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[X:%.*]]) !dbg [[DBG3:![0-9]+]] {
+; CHECK-SAME: (i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[X:%.*]]) !dbg [[DBG3:![0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[X]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    call void @sink(i32 [[TMP2]])
@@ -33,14 +33,14 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) %P) {
 
 define void @caller(i32** %Y, %struct.pair* %P) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller
-; IS__TUNIT____-SAME: (i32** nocapture readonly [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) {
-; IS__TUNIT____-NEXT:    call void @test(i32** nocapture readonly align 8 [[Y]]), !dbg [[DBG4:![0-9]+]]
+; IS__TUNIT____-SAME: (i32** nocapture nofree readonly [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) {
+; IS__TUNIT____-NEXT:    call void @test(i32** nocapture nofree readonly align 8 [[Y]]), !dbg [[DBG4:![0-9]+]]
 ; IS__TUNIT____-NEXT:    call void @test_byval(), !dbg [[DBG5:![0-9]+]]
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller
-; IS__CGSCC____-SAME: (i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) {
-; IS__CGSCC____-NEXT:    call void @test(i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[Y]]), !dbg [[DBG4:![0-9]+]]
+; IS__CGSCC____-SAME: (i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) {
+; IS__CGSCC____-NEXT:    call void @test(i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y]]), !dbg [[DBG4:![0-9]+]]
 ; IS__CGSCC____-NEXT:    call void @test_byval(), !dbg [[DBG5:![0-9]+]]
 ; IS__CGSCC____-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
index 4ade2347abe99..4b9b6577e7655 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
@@ -25,7 +25,7 @@ define void @run() {
 ; IS________OPM-LABEL: define {{[^@]+}}@run
 ; IS________OPM-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nocapture nofree noundef nonnull readonly byval(%struct.Foo) align 8 dereferenceable(16) @a) #[[ATTR0]]
+; IS________OPM-NEXT:    [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nocapture nofree noundef nonnull readonly byval([[STRUCT_FOO:%.*]]) align 8 dereferenceable(16) @a) #[[ATTR0]]
 ; IS________OPM-NEXT:    unreachable
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree noreturn nosync nounwind readnone

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
index 78e51fbe6c1ee..eb593ae7edbfc 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -11,7 +11,7 @@ define void @caller() #0 {
 ; IS________OPM-LABEL: define {{[^@]+}}@caller() {
 ; IS________OPM-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; IS________OPM-NEXT:    store i32 42, i32* [[X]], align 4
-; IS________OPM-NEXT:    call void @promote_i32_ptr(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof [[PROF0:![0-9]+]]
+; IS________OPM-NEXT:    call void @promote_i32_ptr(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof [[PROF0:![0-9]+]]
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@caller() {
@@ -29,7 +29,7 @@ define void @caller() #0 {
 
 define internal void @promote_i32_ptr(i32* %xp) {
 ; IS________OPM-LABEL: define {{[^@]+}}@promote_i32_ptr
-; IS________OPM-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) {
+; IS________OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) {
 ; IS________OPM-NEXT:    [[X:%.*]] = load i32, i32* [[XP]], align 4
 ; IS________OPM-NEXT:    call void @use_i32(i32 [[X]])
 ; IS________OPM-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
index 871a4a176d544..89ec34a64f7a4 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
@@ -52,19 +52,28 @@ define void @zed(%pair* byval(%pair) %Data) {
 ; IS__TUNIT_OPM-NEXT:    call void @bar(%pair* noalias nocapture nonnull readonly byval([[PAIR]]) dereferenceable(8) [[DATA]])
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@zed
-; IS________NPM-SAME: (%pair* noalias nocapture nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) {
-; IS________NPM-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
-; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1
-; IS________NPM-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
-; IS________NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1
-; IS________NPM-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@zed
+; IS__TUNIT_NPM-SAME: (%pair* noalias nocapture nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) {
+; IS__TUNIT_NPM-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1
+; IS__TUNIT_NPM-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1
+; IS__TUNIT_NPM-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
+; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@zed
 ; IS__CGSCC_OPM-SAME: (%pair* noalias nocapture noundef nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) {
 ; IS__CGSCC_OPM-NEXT:    call void @bar(%pair* noalias nocapture noundef nonnull readonly byval([[PAIR]]) dereferenceable(8) [[DATA]])
 ; IS__CGSCC_OPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@zed
+; IS__CGSCC_NPM-SAME: (%pair* noalias nocapture nofree nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) {
+; IS__CGSCC_NPM-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1
+; IS__CGSCC_NPM-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
+; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1
+; IS__CGSCC_NPM-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
   call void @bar(%pair* byval(%pair) %Data)
   ret void

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
index 7202cda94b82d..7730bce35ba36 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
@@ -41,7 +41,7 @@ define dso_local void @foo(i32 %N) {
 ; IS__TUNIT_OPM-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
 ; IS__TUNIT_OPM-NEXT:    store float 3.000000e+00, float* [[P]], align 4
 ; IS__TUNIT_OPM-NEXT:    store i32 7, i32* [[N_ADDR]], align 4
-; IS__TUNIT_OPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef)
+; IS__TUNIT_OPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef)
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo
@@ -52,7 +52,7 @@ define dso_local void @foo(i32 %N) {
 ; IS__TUNIT_NPM-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
 ; IS__TUNIT_NPM-NEXT:    store float 3.000000e+00, float* [[P]], align 4
 ; IS__TUNIT_NPM-NEXT:    store i32 7, i32* [[N_ADDR]], align 4
-; IS__TUNIT_NPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef)
+; IS__TUNIT_NPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef)
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@foo
@@ -63,7 +63,7 @@ define dso_local void @foo(i32 %N) {
 ; IS__CGSCC_OPM-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
 ; IS__CGSCC_OPM-NEXT:    store float 3.000000e+00, float* [[P]], align 4
 ; IS__CGSCC_OPM-NEXT:    store i32 7, i32* [[N_ADDR]], align 4
-; IS__CGSCC_OPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024)
+; IS__CGSCC_OPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024)
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo
@@ -74,7 +74,7 @@ define dso_local void @foo(i32 %N) {
 ; IS__CGSCC_NPM-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
 ; IS__CGSCC_NPM-NEXT:    store float 3.000000e+00, float* [[P]], align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 7, i32* [[N_ADDR]], align 4
-; IS__CGSCC_NPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024)
+; IS__CGSCC_NPM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024)
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -89,7 +89,7 @@ entry:
 
 define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) {
 ; IS________OPM-LABEL: define {{[^@]+}}@.omp_outlined.
-; IS________OPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
+; IS________OPM-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
 ; IS________OPM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
@@ -150,7 +150,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@.omp_outlined.
-; IS________NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
+; IS________NPM-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
 ; IS________NPM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4

diff  --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index 13e525ddd709d..d698f6f6ab1de 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -25,7 +25,7 @@ define void @t0_caller(i32* %a) {
 ; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller
@@ -37,7 +37,7 @@ define void @t0_caller(i32* %a) {
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller
@@ -49,7 +49,7 @@ define void @t0_caller(i32* %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller
@@ -61,7 +61,7 @@ define void @t0_caller(i32* %a) {
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -80,7 +80,7 @@ entry:
 define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
 ;
 ; IS________OPM-LABEL: define {{[^@]+}}@t0_callback_callee
-; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -89,7 +89,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t0_callback_callee
-; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -125,7 +125,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller
@@ -137,7 +137,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller
@@ -149,7 +149,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller
@@ -161,7 +161,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -181,7 +181,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ;
 ; IS________OPM: Function Attrs: nosync
 ; IS________OPM-LABEL: define {{[^@]+}}@t1_callback_callee
-; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -191,7 +191,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ;
 ; IS________NPM: Function Attrs: nosync
 ; IS________NPM-LABEL: define {{[^@]+}}@t1_callback_callee
-; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -225,7 +225,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller
@@ -237,7 +237,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller
@@ -249,7 +249,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller
@@ -261,7 +261,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -282,7 +282,7 @@ entry:
 define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
 ;
 ; IS________OPM-LABEL: define {{[^@]+}}@t2_callback_callee
-; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -291,7 +291,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t2_callback_callee
-; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -325,8 +325,8 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
-; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller
@@ -338,8 +338,8 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__TUNIT_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__TUNIT_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
-; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__TUNIT_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller
@@ -351,8 +351,8 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller
@@ -364,8 +364,8 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_NPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
-; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_NPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
@@ -387,7 +387,7 @@ entry:
 define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
 ;
 ; IS________OPM-LABEL: define {{[^@]+}}@t3_callback_callee
-; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________OPM-NEXT:  entry:
 ; IS________OPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________OPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4
@@ -396,7 +396,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a,
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@t3_callback_callee
-; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
+; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) {
 ; IS________NPM-NEXT:  entry:
 ; IS________NPM-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
 ; IS________NPM-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 599c8cb6995b0..efceee3a35712 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -713,7 +713,7 @@ define void @test16a(i8 %v, i8** %P) {
 
 define void @test16b(i8 %v, i8** %P) {
 ; CHECK-LABEL: define {{[^@]+}}@test16b
-; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) {
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
 ; CHECK-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
 ; CHECK-NEXT:    tail call void @no_sync_func(i8* nocapture nofree [[TMP1]])
@@ -729,7 +729,7 @@ define void @test16b(i8 %v, i8** %P) {
 
 define void @test16c(i8 %v, i8** %P) {
 ; IS________OPM-LABEL: define {{[^@]+}}@test16c
-; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) {
+; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
 ; IS________OPM-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
 ; IS________OPM-NEXT:    tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR5]]
@@ -737,7 +737,7 @@ define void @test16c(i8 %v, i8** %P) {
 ; IS________OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@test16c
-; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) {
+; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
 ; IS________NPM-NEXT:    [[TMP1:%.*]] = alloca i8, i64 4, align 1
 ; IS________NPM-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
 ; IS________NPM-NEXT:    tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR6]]
@@ -752,7 +752,7 @@ define void @test16c(i8 %v, i8** %P) {
 
 define void @test16d(i8 %v, i8** %P) {
 ; CHECK-LABEL: define {{[^@]+}}@test16d
-; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) {
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
 ; CHECK-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll
index 88858d8ccb086..cacdf5cfc158c 100644
--- a/llvm/test/Transforms/Attributor/memory_locations.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations.ll
@@ -178,7 +178,7 @@ return:                                           ; preds = %if.end, %if.then
 define dso_local i8* @internal_argmem_only_read(i32* %arg) {
 ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
 ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_read
-; CHECK-SAME: (i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP]] to i64
@@ -195,7 +195,7 @@ entry:
 define dso_local i8* @internal_argmem_only_write(i32* %arg) {
 ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
 ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_write
-; CHECK-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 10, i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias dereferenceable_or_null(10) i8* @malloc(i64 noundef 10)
@@ -210,16 +210,16 @@ entry:
 define dso_local i8* @internal_argmem_only_rec(i32* %arg) {
 ; IS__TUNIT____: Function Attrs: inaccessiblemem_or_argmemonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_argmem_only_rec
-; IS__TUNIT____-SAME: (i32* nocapture [[ARG:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i32* nocapture nofree [[ARG:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture align 4 [[ARG]])
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree align 4 [[ARG]])
 ; IS__TUNIT____-NEXT:    ret i8* [[CALL]]
 ;
 ; IS__CGSCC____: Function Attrs: inaccessiblemem_or_argmemonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_argmem_only_rec
-; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG]])
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG]])
 ; IS__CGSCC____-NEXT:    ret i8* [[CALL]]
 ;
 entry:
@@ -230,7 +230,7 @@ entry:
 define internal i8* @internal_argmem_only_rec_1(i32* %arg) {
 ; IS__TUNIT____: Function Attrs: inaccessiblemem_or_argmemonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1
-; IS__TUNIT____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARG]], align 4
 ; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP]], 0
@@ -243,7 +243,7 @@ define internal i8* @internal_argmem_only_rec_1(i32* %arg) {
 ; IS__TUNIT____-NEXT:    br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]]
 ; IS__TUNIT____:       if.then2:
 ; IS__TUNIT____-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nonnull align 4 dereferenceable(4) [[ADD_PTR]])
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nofree nonnull align 4 dereferenceable(4) [[ADD_PTR]])
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       if.end3:
 ; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARG]], align 4
@@ -256,7 +256,7 @@ define internal i8* @internal_argmem_only_rec_1(i32* %arg) {
 ;
 ; IS__CGSCC____: Function Attrs: inaccessiblemem_or_argmemonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1
-; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARG]], align 4
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP]], 0
@@ -269,7 +269,7 @@ define internal i8* @internal_argmem_only_rec_1(i32* %arg) {
 ; IS__CGSCC____-NEXT:    br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]]
 ; IS__CGSCC____:       if.then2:
 ; IS__CGSCC____-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ADD_PTR]])
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ADD_PTR]])
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       if.end3:
 ; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARG]], align 4
@@ -312,11 +312,11 @@ return:                                           ; preds = %if.end3, %if.then2,
 define internal i8* @internal_argmem_only_rec_2(i32* %arg) {
 ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
 ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_2
-; CHECK-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 0, i32* [[ARG]], align 4
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1
-; CHECK-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nonnull align 4 dereferenceable(4) [[ADD_PTR]])
+; CHECK-NEXT:    [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree nonnull align 4 dereferenceable(4) [[ADD_PTR]])
 ; CHECK-NEXT:    ret i8* [[CALL]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 9d2c6208c6f23..f2e4b4e0dce9f 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -854,13 +854,13 @@ declare void @val_use(i8 %ptr) readonly nounwind willreturn
 define void @ptr_uses(i8* %ptr, i8* %wptr) {
 ; IS__TUNIT____: Function Attrs: nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_uses
-; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR12:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    store i8 0, i8* [[WPTR]], align 1
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@ptr_uses
-; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14]] {
+; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14]] {
 ; IS__CGSCC____-NEXT:    store i8 0, i8* [[WPTR]], align 1
 ; IS__CGSCC____-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll
index 5bf96a2864978..beb3a24723b73 100644
--- a/llvm/test/Transforms/Attributor/nofree.ll
+++ b/llvm/test/Transforms/Attributor/nofree.ll
@@ -347,7 +347,7 @@ declare double @cos(double) nobuiltin nounwind nofree
 define noalias i32* @test13(i64* nocapture readonly %a) {
 ; CHECK: Function Attrs: nounwind
 ; CHECK-LABEL: define {{[^@]+}}@test13
-; CHECK-SAME: (i64* nocapture noundef nonnull readonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[A]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @malloc(i64 [[TMP0]]) #[[ATTR2]]

diff  --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll
index 9a7a98bea5c65..64dc14dd93760 100644
--- a/llvm/test/Transforms/Attributor/nonnull.ll
+++ b/llvm/test/Transforms/Attributor/nonnull.ll
@@ -909,7 +909,7 @@ f:
 
 define i8 @parent6(i8* %a, i8* %b) {
 ; CHECK-LABEL: define {{[^@]+}}@parent6
-; CHECK-SAME: (i8* nonnull [[A:%.*]], i8* noundef [[B:%.*]]) {
+; CHECK-SAME: (i8* nonnull [[A:%.*]], i8* nofree noundef [[B:%.*]]) {
 ; CHECK-NEXT:    [[C:%.*]] = load volatile i8, i8* [[B]], align 1
 ; CHECK-NEXT:    call void @use1nonnull(i8* nonnull [[A]])
 ; CHECK-NEXT:    ret i8 [[C]]

diff  --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll
index cc91f56fb3472..c053c2f1578b6 100644
--- a/llvm/test/Transforms/Attributor/norecurse.ll
+++ b/llvm/test/Transforms/Attributor/norecurse.ll
@@ -73,7 +73,7 @@ define void @intrinsic(i8* %dest, i8* %src, i32 %len) {
 ; CHECK: Function Attrs: argmemonly nofree nosync nounwind willreturn
 ; CHECK-LABEL: define {{[^@]+}}@intrinsic
 ; CHECK-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR5:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR10:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR11:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false)
@@ -93,7 +93,7 @@ define internal i32 @called_by_norecurse() {
 ;
 ; IS__CGSCC____: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@called_by_norecurse
-; IS__CGSCC____-SAME: () #[[ATTR6:[0-9]+]] {
+; IS__CGSCC____-SAME: () #[[ATTR7:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @k()
 ; IS__CGSCC____-NEXT:    ret i32 undef
 ;
@@ -103,13 +103,13 @@ define internal i32 @called_by_norecurse() {
 define void @m() norecurse {
 ; IS__TUNIT____: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@m
-; IS__TUNIT____-SAME: () #[[ATTR6:[0-9]+]] {
+; IS__TUNIT____-SAME: () #[[ATTR7:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@m
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
 ; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse()
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -136,7 +136,7 @@ define internal i32 @o() {
 ;
 ; IS__CGSCC____: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@o
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
 ; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly()
 ; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
@@ -146,13 +146,13 @@ define internal i32 @o() {
 define i32 @p() norecurse {
 ; IS__TUNIT____: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@p
-; IS__TUNIT____-SAME: () #[[ATTR6]] {
+; IS__TUNIT____-SAME: () #[[ATTR7]] {
 ; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @o() #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    ret i32 [[A]]
 ;
 ; IS__CGSCC____: Function Attrs: norecurse nosync readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@p
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
 ; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @o()
 ; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
@@ -163,7 +163,7 @@ define i32 @p() norecurse {
 define void @f(i32 %x)  {
 ; CHECK: Function Attrs: nofree nosync nounwind readnone
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (i32 [[X:%.*]]) #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: (i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
@@ -171,7 +171,7 @@ define void @f(i32 %x)  {
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    call void @g() #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    call void @g() #[[ATTR9:[0-9]+]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
@@ -194,9 +194,9 @@ if.end:
 define void @g() norecurse {
 ; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone
 ; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR9]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @f(i32 noundef 0) #[[ATTR7]]
+; CHECK-NEXT:    call void @f(i32 noundef 0) #[[ATTR8]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -225,7 +225,7 @@ define i32 @eval_func1(i32 (i32)* , i32) local_unnamed_addr {
 define i32 @eval_func2(i32 (i32)* , i32) local_unnamed_addr null_pointer_is_valid{
 ; CHECK: Function Attrs: null_pointer_is_valid
 ; CHECK-LABEL: define {{[^@]+}}@eval_func2
-; CHECK-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
+; CHECK-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 [[TMP0]](i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
@@ -263,11 +263,12 @@ Dead:
 ; IS__TUNIT____: attributes #[[ATTR3]] = { nosync readnone }
 ; IS__TUNIT____: attributes #[[ATTR4:[0-9]+]] = { readnone }
 ; IS__TUNIT____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR6]] = { norecurse nosync readnone }
-; IS__TUNIT____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone }
-; IS__TUNIT____: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone }
-; IS__TUNIT____: attributes #[[ATTR9]] = { null_pointer_is_valid }
-; IS__TUNIT____: attributes #[[ATTR10]] = { willreturn }
+; IS__TUNIT____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR7]] = { norecurse nosync readnone }
+; IS__TUNIT____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR10]] = { null_pointer_is_valid }
+; IS__TUNIT____: attributes #[[ATTR11]] = { willreturn }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
@@ -275,9 +276,10 @@ Dead:
 ; IS__CGSCC____: attributes #[[ATTR3]] = { nosync readnone }
 ; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { readnone }
 ; IS__CGSCC____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR6]] = { norecurse nosync readnone }
-; IS__CGSCC____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone }
-; IS__CGSCC____: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone }
-; IS__CGSCC____: attributes #[[ATTR9]] = { null_pointer_is_valid }
-; IS__CGSCC____: attributes #[[ATTR10]] = { willreturn }
+; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR7]] = { norecurse nosync readnone }
+; IS__CGSCC____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR10]] = { null_pointer_is_valid }
+; IS__CGSCC____: attributes #[[ATTR11]] = { willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index d79e5c1d1f271..031fb23b82ed2 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -2419,7 +2419,7 @@ f:
 
 define void @spam(i32* %arg, i32* %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@spam
-; CHECK-SAME: (i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[ARG:%.*]], i32* nocapture nofree readnone [[ARG1:%.*]]) {
+; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[ARG:%.*]], i32* nocapture nofree readnone [[ARG1:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARG]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP]], 4

diff  --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll
index 1295271357095..9beec8f7c0e2f 100644
--- a/llvm/test/Transforms/Attributor/readattrs.ll
+++ b/llvm/test/Transforms/Attributor/readattrs.ll
@@ -96,7 +96,7 @@ declare void @test6_1()
 ; This is not a missed optz'n.
 define void @test6_2(i8** %p, i8* %q) {
 ; CHECK-LABEL: define {{[^@]+}}@test6_2
-; CHECK-SAME: (i8** nocapture noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* [[Q:%.*]]) {
+; CHECK-SAME: (i8** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* nofree [[Q:%.*]]) {
 ; CHECK-NEXT:    store i8* [[Q]], i8** [[P]], align 8
 ; CHECK-NEXT:    call void @test6_1()
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index ab8d87118b68e..36016eb253602 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -522,13 +522,13 @@ define void @complicated_args_sret(%struct.X** %b) {
 ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_sret
 ; IS__TUNIT_OPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] {
-; IS__TUNIT_OPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret(%struct.X) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR6:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR6:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_sret
 ; IS__TUNIT_NPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] {
-; IS__TUNIT_NPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret(%struct.X) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly

diff  --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll
index 658726c611561..71259af3cac78 100644
--- a/llvm/test/Transforms/Attributor/willreturn.ll
+++ b/llvm/test/Transforms/Attributor/willreturn.ll
@@ -399,7 +399,7 @@ define void @only_exit() local_unnamed_addr #0 {
 define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 {
 ; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@conditional_exit
-; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]], i32* nocapture readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0
 ; IS__TUNIT____-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
 ; IS__TUNIT____:       4:
@@ -417,7 +417,7 @@ define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_
 ;
 ; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@conditional_exit
-; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i32* nocapture readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0
 ; IS__CGSCC____-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
 ; IS__CGSCC____:       4:

diff  --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 633a7616cf30c..d1629b8a86f1b 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -27,9 +27,17 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 define void @delete_parallel_0() {
 ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_0() {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_0() {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    ret void
 entry:
   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*))
   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.0 to void (i32*, i32*, ...)*))
@@ -40,11 +48,21 @@ entry:
 
 define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR0:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @unknown() [[ATTR0]]
+; CHECK-NEXT:    call void @unknown() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @unknown() #[[ATTR0]]
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @unknown() #[[ATTR0]]
+; CHECK2-NEXT:    ret void
 entry:
   call void @unknown() willreturn
   ret void
@@ -52,11 +70,21 @@ entry:
 
 define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR1:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @readonly() [[ATTR4:#.*]]
+; CHECK-NEXT:    call void @readonly() #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @readonly() #[[ATTR4:[0-9]+]]
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @readonly() #[[ATTR4:[0-9]+]]
+; CHECK2-NEXT:    ret void
 entry:
   call void @readonly()
   ret void
@@ -64,11 +92,21 @@ entry:
 
 define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR2:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @readnone() [[ATTR0]]
+; CHECK-NEXT:    call void @readnone() #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @readnone() #[[ATTR0]]
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @readnone() #[[ATTR0]]
+; CHECK2-NEXT:    ret void
 entry:
   call void @readnone() willreturn
   ret void
@@ -76,10 +114,18 @@ entry:
 
 define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    ret void
 entry:
   ret void
 }
@@ -99,11 +145,23 @@ entry:
 define void @delete_parallel_1() {
 ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_1() {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_1() {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+; CHECK2-NEXT:    ret void
 entry:
   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
@@ -119,6 +177,16 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @unknown()
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @unknown()
+; CHECK2-NEXT:    ret void
 entry:
   call void @unknown()
   ret void
@@ -126,11 +194,21 @@ entry:
 
 define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR4]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @readonly() [[ATTR4]]
+; CHECK-NEXT:    call void @readonly() #[[ATTR4]]
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @readonly() #[[ATTR4]]
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..0
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @readonly() #[[ATTR4]]
+; CHECK2-NEXT:    ret void
 entry:
   call void @readonly()
   ret void
@@ -138,11 +216,21 @@ entry:
 
 define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR5:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @readnone()
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    call void @readnone()
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    call void @readnone()
+; CHECK2-NEXT:    ret void
 entry:
   call void @readnone()
   ret void
@@ -150,10 +238,18 @@ entry:
 
 define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    ret void
 entry:
   ret void
 }
@@ -188,16 +284,42 @@ define void @delete_parallel_2() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) [[ATTR0]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
 ; CHECK-NEXT:    store i32 0, i32* [[A]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_2() {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
+; CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_2() {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]]
+; CHECK2-NEXT:    store i32 0, i32* [[A]], align 4
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]])
+; CHECK2-NEXT:    ret void
 entry:
   %a = alloca i32, align 4
   %tmp = bitcast i32* %a to i8*
@@ -214,9 +336,9 @@ entry:
 
 define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) [[ATTR6:#.*]] {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() [[ATTR12:#.*]]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
@@ -227,6 +349,32 @@ define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]]
+; CHECK1-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK1-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK1:       if.then:
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
+; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK1-NEXT:    br label [[IF_END]]
+; CHECK1:       if.end:
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]]
+; CHECK2-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK2-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK2:       if.then:
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
+; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK2-NEXT:    br label [[IF_END]]
+; CHECK2:       if.end:
+; CHECK2-NEXT:    ret void
 entry:
   %call = call i32 @omp_get_thread_num()
   %cmp = icmp eq i32 %call, 0
@@ -244,21 +392,51 @@ if.end:                                           ; preds = %if.then, %entry
 
 define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK:       omp_if.then:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    br label [[OMP_IF_END]]
 ; CHECK:       omp_if.end:
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK1:       omp_if.then:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    br label [[OMP_IF_END]]
+; CHECK1:       omp_if.end:
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK2:       omp_if.then:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    br label [[OMP_IF_END]]
+; CHECK2:       omp_if.end:
+; CHECK2-NEXT:    ret void
 entry:
   %tmp = load i32, i32* %.global_tid., align 4
   %tmp1 = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 %tmp)
@@ -286,23 +464,57 @@ declare void @__kmpc_end_master(%struct.ident_t*, i32)
 
 define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB0]]) [[ATTR12]]
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]]
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
 ; CHECK:       omp_if.then:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]])
+; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
 ; CHECK-NEXT:    br label [[OMP_IF_END]]
 ; CHECK:       omp_if.end:
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull [[GLOB1:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]]
+; CHECK1-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK1:       omp_if.then:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK1-NEXT:    br label [[OMP_IF_END]]
+; CHECK1:       omp_if.end:
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]]
+; CHECK2-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK2:       omp_if.then:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]])
+; CHECK2-NEXT:    br label [[OMP_IF_END]]
+; CHECK2:       omp_if.end:
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    ret void
 entry:
   %omp_global_thread_num = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
   %tmp = load i32, i32* %.global_tid., align 4
@@ -324,18 +536,18 @@ omp_if.end:                                       ; preds = %entry, %omp_if.then
 
 define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A1:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) [[ATTR0]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]]
 ; CHECK-NEXT:    store i32 1, i32* [[A1]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
 ; CHECK-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2:@.*]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 ; CHECK-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 ; CHECK-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
@@ -345,7 +557,7 @@ define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
-; CHECK-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
 ; CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 ; CHECK:       .omp.reduction.case2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
@@ -356,6 +568,70 @@ define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A1:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]]
+; CHECK1-NEXT:    store i32 1, i32* [[A1]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
+; CHECK1-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK1-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+; CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+; CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+; CHECK1-NEXT:    ]
+; CHECK1:       .omp.reduction.case1:
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK1:       .omp.reduction.case2:
+; CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK1-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4
+; CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK1:       .omp.reduction.default:
+; CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A1:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]]
+; CHECK2-NEXT:    store i32 1, i32* [[A1]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
+; CHECK2-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK2-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+; CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+; CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+; CHECK2-NEXT:    ]
+; CHECK2:       .omp.reduction.case1:
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var)
+; CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK2:       .omp.reduction.case2:
+; CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK2-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4
+; CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK2:       .omp.reduction.default:
+; CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]])
+; CHECK2-NEXT:    ret void
 entry:
   %a1 = alloca i32, align 4
   %.omp.reduction.red_list = alloca [1 x i8*], align 8
@@ -393,7 +669,7 @@ entry:
 
 define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) [[ATTR9:#.*]] {
+; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
@@ -405,6 +681,30 @@ define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+; CHECK1-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
+; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
+; CHECK1-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK1-NEXT:    ret void
+; CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+; CHECK2-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
+; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK2-NEXT:    ret void
 entry:
   %tmp = bitcast i8* %arg1 to i32**
   %tmp2 = load i32*, i32** %tmp, align 8

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index 1afb842311166..4317c7fdcab16 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
-; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s
-; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging  < %s | FileCheck %s
+; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s --check-prefix=CHECK1
+; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging  < %s | FileCheck %s --check-prefix=CHECK2
 ; #include <omp.h>
 ; void foo();
 ; void use(int);
@@ -792,10 +792,2935 @@ entry:
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK:       omp_region.end4:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body5:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK:       seq.par.merged2:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK:       omp_region.body5.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    br label [[OMP_REGION_END4]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK:       omp_region.end4:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body5:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK:       seq.par.merged2:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK:       omp_region.body5.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    br label [[OMP_REGION_END4]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK:       omp_region.end:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK:       omp_region.end4:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split.split:
+; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK:       omp_region.body5:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK:       seq.par.merged2:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split.split.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK:       omp_region.body5.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    br label [[OMP_REGION_END4]]
+; CHECK:       omp_region.body:
+; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK:       seq.par.merged:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK:       omp.par.merged.split:
+; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK:       omp_region.body.split:
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    br label [[OMP_REGION_END]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK:       omp.par.outlined.exit:
+; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK:       omp.par.exit.split:
+; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    call void (...) @foo()
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  omp.par.entry:
+; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK:       omp.par.outlined.exit.exitStub:
+; CHECK-NEXT:    ret void
+; CHECK:       omp.par.region:
+; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK:       omp.par.merged:
+; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK:       omp.par.region.split:
+; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK:       omp.par.pre_finalize:
+; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@merge
+; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK:       omp_parallel:
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -803,10 +3728,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0:#.*]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -818,10 +3741,10 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -829,116 +3752,92 @@ entry:
 ; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[A]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -948,10 +3847,8 @@ entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -963,19 +3860,19 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -993,35 +3890,29 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float
-; CHECK-SAME: (float [[F:%.*]], float* nocapture writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1029,10 +3920,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1044,19 +3933,19 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1074,38 +3963,32 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1115,10 +3998,8 @@ entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1130,16 +4011,16 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
 ; CHECK-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
@@ -1163,35 +4044,29 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13
 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1199,10 +4074,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
@@ -1215,19 +4088,19 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1248,38 +4121,32 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
 ; CHECK-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1289,10 +4156,8 @@ entry:
 ; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1304,19 +4169,19 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1335,28 +4200,22 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP1]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions
 ; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
@@ -1364,10 +4223,10 @@ entry:
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1375,10 +4234,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1390,10 +4247,10 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1401,36 +4258,30 @@ entry:
 ; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
@@ -1438,10 +4289,10 @@ entry:
 ; CHECK-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1449,10 +4300,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1464,19 +4313,19 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1495,47 +4344,41 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21
-; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_3
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1543,10 +4386,8 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1558,13 +4399,13 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1572,32 +4413,24 @@ entry:
 ; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
@@ -1605,10 +4438,10 @@ entry:
 ; CHECK-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1618,10 +4451,8 @@ entry:
 ; CHECK-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1633,31 +4464,31 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK:       omp_region.end:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK:       omp_region.end4:
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK:       omp.par.merged.split.split.split.split:
-; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1676,7 +4507,7 @@ entry:
 ; CHECK:       omp.par.merged.split.split.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK:       omp_region.body5.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK:       omp_region.body:
 ; CHECK-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
@@ -1688,155 +4519,123 @@ entry:
 ; CHECK:       omp.par.merged.split:
 ; CHECK-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK:       omp_region.body.split:
-; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK-NEXT:    br label [[OMP_REGION_END]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3)
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK:       omp_parallel:
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK:       omp.par.outlined.exit:
 ; CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1844,12 +4643,10 @@ entry:
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
 ; CHECK:       entry.split.split:
 ; CHECK-NEXT:    call void (...) @foo()
-; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] {
+; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  omp.par.entry:
 ; CHECK-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
@@ -1861,10 +4658,10 @@ entry:
 ; CHECK:       omp.par.region:
 ; CHECK-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK:       omp.par.merged:
-; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]])
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK:       entry.split:
 ; CHECK-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -1872,28 +4669,2242 @@ entry:
 ; CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 ; CHECK:       omp.par.pre_finalize:
 ; CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
+; CHECK1-LABEL: define {{[^@]+}}@merge
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (...) @foo()
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK1-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK1-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK1-NEXT:    call void @use(i32 [[CONV]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK1-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK1-NEXT:    call void @use(i32 [[CONV]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK1-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK1-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK1-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK1-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1:       if.then:
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT:    ret void
+; CHECK1:       if.end:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1:       if.then:
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT:    ret void
+; CHECK1:       if.end:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK1-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK1-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1:       if.then:
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT:    ret void
+; CHECK1:       if.end:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK1-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK1:       if.then:
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK1-NEXT:    ret void
+; CHECK1:       if.end:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1:       omp_region.end:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK1:       omp_region.end4:
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split.split.split:
+; CHECK1-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK1:       omp_region.body5:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK1:       seq.par.merged2:
+; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK1-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split.split.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK1:       omp_region.body5.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END4]]
+; CHECK1:       omp_region.body:
+; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK1:       seq.par.merged:
+; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK1:       omp.par.merged.split:
+; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK1:       omp_region.body.split:
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (...) @foo()
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (...) @foo()
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK1:       omp.par.outlined.exit:
+; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK1:       omp.par.exit.split:
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK1:       entry.split.split:
+; CHECK1-NEXT:    call void (...) @foo()
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK1:       omp.par.outlined.exit.exitStub:
+; CHECK1-NEXT:    ret void
+; CHECK1:       omp.par.region:
+; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK1:       omp.par.merged:
+; CHECK1-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK1:       entry.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK1:       omp.par.region.split:
+; CHECK1-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK1:       omp.par.pre_finalize:
+; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK1-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK1-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_proc_bind
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_num_threads
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_seq_call
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (...) @foo()
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
+; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
+; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK2-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK2-NEXT:    call void @use(i32 [[CONV]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[F]], align 4
+; CHECK2-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+; CHECK2-NEXT:    call void @use(i32 [[CONV]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
+; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32
+; CHECK2-NEXT:    call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK2-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK2-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions
+; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2:       if.then:
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    ret void
+; CHECK2:       if.end:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2:       if.then:
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    ret void
+; CHECK2:       if.end:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
+; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
+; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2:       if.then:
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    ret void
+; CHECK2:       if.end:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21
+; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK2-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK2:       if.then:
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1)
+; CHECK2-NEXT:    ret void
+; CHECK2:       if.end:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
+; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2:       omp_region.end:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK2:       omp_region.end4:
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split.split.split:
+; CHECK2-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body5:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
+; CHECK2:       seq.par.merged2:
+; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split.split.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
+; CHECK2:       omp_region.body5.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END4]]
+; CHECK2:       omp_region.body:
+; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
+; CHECK2:       seq.par.merged:
+; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
+; CHECK2:       omp.par.merged.split:
+; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
+; CHECK2:       omp_region.body.split:
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    br label [[OMP_REGION_END]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_seq_call
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (...) @foo()
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (...) @foo()
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_proc_bind
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_num_threads
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1
+; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
+; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+; CHECK2:       omp.par.outlined.exit:
+; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+; CHECK2:       omp.par.exit.split:
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK2:       entry.split.split:
+; CHECK2-NEXT:    call void (...) @foo()
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+; CHECK2:       omp.par.outlined.exit.exitStub:
+; CHECK2-NEXT:    ret void
+; CHECK2:       omp.par.region:
+; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
+; CHECK2:       omp.par.merged:
+; CHECK2-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; CHECK2:       entry.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
+; CHECK2:       omp.par.region.split:
+; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+; CHECK2:       omp.par.pre_finalize:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
+;
+;
+; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39
+; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) {
+; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
+; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
+; CHECK2-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list