[clang] c204106 - [Clang][OpenMP] Frontend work for sections - D89671
Chirag Khandelwal via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 29 07:22:34 PDT 2021
Author: Chirag Khandelwal
Date: 2021-04-29T19:52:27+05:30
New Revision: c20410618827b7870fbc86d45ff8e9b11bc169b4
URL: https://github.com/llvm/llvm-project/commit/c20410618827b7870fbc86d45ff8e9b11bc169b4
DIFF: https://github.com/llvm/llvm-project/commit/c20410618827b7870fbc86d45ff8e9b11bc169b4.diff
LOG: [Clang][OpenMP] Frontend work for sections - D89671
This patch is child of D89671, contains the clang
implementation to use the OpenMP IRBuilder's section
construct.
Co-author: @anchu-rajendran
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D91054
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/cancel_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0a408837d1c6a..a3c7365fafd87 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1205,7 +1205,7 @@ namespace {
// Builder if one is present.
struct PushAndPopStackRAII {
PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
- bool HasCancel)
+ bool HasCancel, llvm::omp::Directive Kind)
: OMPBuilder(OMPBuilder) {
if (!OMPBuilder)
return;
@@ -1234,8 +1234,7 @@ struct PushAndPopStackRAII {
// TODO: Remove this once we emit parallel regions through the
// OpenMPIRBuilder as it can do this setup internally.
- llvm::OpenMPIRBuilder::FinalizationInfo FI(
- {FiniCB, OMPD_parallel, HasCancel});
+ llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
OMPBuilder->pushFinalizationCB(std::move(FI));
}
~PushAndPopStackRAII() {
@@ -1276,7 +1275,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
// TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
// parallel region to make cancellation barriers work properly.
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
- PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
+ PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 7d8744651a4ee..83d3dd0fc813c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3791,6 +3791,64 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
}
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ const CapturedStmt *ICS = S.getInnermostCapturedStmt();
+ const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
+ const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
+ llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
+ if (CS) {
+ for (const Stmt *SubStmt : CS->children()) {
+ auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
+ FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
+ FiniBB);
+ };
+ SectionCBVector.push_back(SectionCB);
+ }
+ } else {
+ auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
+ FiniBB);
+ };
+ SectionCBVector.push_back(SectionCB);
+ }
+
+ // Privatization callback that performs appropriate action for
+ // shared/private/firstprivate/lastprivate/copyin/... variables.
+ //
+ // TODO: This defaults to shared right now.
+ auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
+ // The next line is appropriate only for variables (Val) with the
+ // data-sharing attribute "shared".
+ ReplVal = &Val;
+
+ return CodeGenIP;
+ };
+
+ CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+ Builder.restoreIP(OMPBuilder.createSections(
+ Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
+ S.getSingleClause<OMPNowaitClause>()));
+ return;
+ }
{
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
@@ -3807,6 +3865,29 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
+
+ return;
+ }
LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
EmitStmt(S.getAssociatedStmt());
@@ -6312,7 +6393,9 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
- if (S.getCancelRegion() == OMPD_parallel) {
+ if (S.getCancelRegion() == OMPD_parallel ||
+ S.getCancelRegion() == OMPD_sections ||
+ S.getCancelRegion() == OMPD_section) {
llvm::Value *IfCondition = nullptr;
if (IfCond)
IfCondition = EmitScalarExpr(IfCond,
diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 80e2e294a60c9..fa84a239c71a7 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1,28 +1,27 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.0 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t.0 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_IRB
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.1 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.1 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_IRB
-// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_SIMD
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.2 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t.2 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_SIMD
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.3 %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t.3 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
-// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_IRB
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.4 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.4 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_IRB
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_SIMD
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.5 %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t.5 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_SIMD
// expected-no-diagnostics
#ifndef HEADER
@@ -37,15 +36,10 @@ int main (int argc, char **argv) {
#pragma omp barrier
argv[0][0] += argc;
}
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp sections
{
#pragma omp cancel sections
}
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: call i32 @__kmpc_cancel(
-// ALL: call void @__kmpc_for_static_fini(
-// ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp sections
{
#pragma omp cancel sections
@@ -54,53 +48,18 @@ int main (int argc, char **argv) {
#pragma omp cancel sections
}
}
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: call void @__kmpc_for_static_fini(
#pragma omp for
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for if(cancel: flag)
}
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[FLAG:%.+]] = load float, float* @{{.+}},
-// ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// ALL: [[THEN]]
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 2)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: [[ELSE]]
-// ALL: br label
-// ALL: call void @__kmpc_for_static_fini(
-// ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp task
{
#pragma omp cancel taskgroup
}
-// ALL: call i8* @__kmpc_omp_task_alloc(
-// ALL: call i32 @__kmpc_omp_task(
#pragma omp parallel sections
{
#pragma omp cancel sections
}
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp parallel sections
{
#pragma omp cancel sections
@@ -109,104 +68,1307 @@ for (int i = 0; i < argc; ++i) {
#pragma omp cancel sections
}
}
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
int r = 0;
#pragma omp parallel for reduction(+: r)
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for
r += i;
}
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
return argc;
}
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
-// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
-// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// CHECK: [[THEN]]
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
-// CHECK: [[EXIT]]
-// CHECK: br label %[[RETURN:.+]]
-// CHECK: [[ELSE]]
-// The barrier directive should now call __kmpc_cancel_barrier
-// CHECK: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
-// CHECK: br label
-// CHECK: [[RETURN]]
-// CHECK: ret void
-
-// CHECK: define internal i32 @{{[^(]+}}(i32
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 4)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
-// CHECK: [[EXIT]]
-// CHECK: br label %[[RETURN:.+]]
-// CHECK: [[RETURN]]
-// CHECK: ret i32 0
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call i32 @__kmpc_cancel(
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 2)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call i32 @__kmpc_reduce_nowait(
-// CHECK: call void @__kmpc_end_reduce_nowait(
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// IRBUILDER: define internal void @main
-
-// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
-// IRBUILDER-NEXT: ret void
-// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
-
-// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// IRBUILDER: [[ELSE]]
-// IRBUILDER-NEXT: br label %[[ELSE2:.*]]
-// IRBUILDER: [[ELSE2]]
-// The barrier directive should now call __kmpc_cancel_barrier
-// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
-// IRBUILDER: br label
-// IRBUILDER: [[THEN]]
-// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
-// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
-// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
-// IRBUILDER: [[EXIT]]
-// IRBUILDER: br label %[[RETURN]]
-// IRBUILDER: [[CONTINUE]]
-// IRBUILDER: br label %[[ELSE2:.+]]
-
#endif
+// OMP-LABEL: @main(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[I24:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// OMP-NEXT: [[R:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// OMP-NEXT: store i32 0, i32* [[RETVAL]], align 4
+// OMP-NEXT: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+// OMP-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+// OMP-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP: omp.inner.for.cond:
+// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
+// OMP-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP: omp.inner.for.body:
+// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT: ]
+// OMP: .omp.sections.case:
+// OMP-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// OMP-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP: .cancel.exit:
+// OMP-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP: .cancel.continue:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP: .omp.sections.exit:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP: omp.inner.for.inc:
+// OMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1
+// OMP-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP: omp.inner.for.end:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP: cancel.cont:
+// OMP-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// OMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
+// OMP-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
+// OMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
+// OMP-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]]
+// OMP: omp.inner.for.cond6:
+// OMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// OMP-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
+// OMP: omp.inner.for.body8:
+// OMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
+// OMP-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
+// OMP-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
+// OMP-NEXT: ]
+// OMP: .omp.sections.case9:
+// OMP-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// OMP-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
+// OMP: .cancel.exit10:
+// OMP-NEXT: br label [[CANCEL_EXIT19:%.*]]
+// OMP: cancel.exit:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT: br label [[CANCEL_CONT]]
+// OMP: .cancel.continue11:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT15]]
+// OMP: .omp.sections.case12:
+// OMP-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// OMP-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
+// OMP: .cancel.exit13:
+// OMP-NEXT: br label [[CANCEL_EXIT19]]
+// OMP: .cancel.continue14:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT15]]
+// OMP: .omp.sections.exit15:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]]
+// OMP: omp.inner.for.inc16:
+// OMP-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
+// OMP-NEXT: store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND6]]
+// OMP: omp.inner.for.end18:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT: br label [[CANCEL_CONT20:%.*]]
+// OMP: cancel.cont20:
+// OMP-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// OMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
+// OMP-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP-NEXT: [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP-NEXT: store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT: store i32 0, i32* [[I]], align 4
+// OMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
+// OMP-NEXT: br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP: omp.precond.then:
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
+// OMP-NEXT: br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP: cond.true:
+// OMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT: br label [[COND_END:%.*]]
+// OMP: cond.false:
+// OMP-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: br label [[COND_END]]
+// OMP: cond.end:
+// OMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
+// OMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND26:%.*]]
+// OMP: omp.inner.for.cond26:
+// OMP-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
+// OMP-NEXT: br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
+// OMP: omp.inner.for.body28:
+// OMP-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
+// OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP-NEXT: store i32 [[ADD]], i32* [[I24]], align 4
+// OMP-NEXT: [[TMP35:%.*]] = load float, float* @flag, align 4
+// OMP-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
+// OMP-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP: omp_if.then:
+// OMP-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
+// OMP-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// OMP-NEXT: br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
+// OMP: .cancel.exit29:
+// OMP-NEXT: br label [[CANCEL_EXIT34:%.*]]
+// OMP: cancel.exit19:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT: br label [[CANCEL_CONT20]]
+// OMP: .cancel.continue30:
+// OMP-NEXT: br label [[OMP_IF_END:%.*]]
+// OMP: omp_if.else:
+// OMP-NEXT: br label [[OMP_IF_END]]
+// OMP: omp_if.end:
+// OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP: omp.body.continue:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC31:%.*]]
+// OMP: omp.inner.for.inc31:
+// OMP-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
+// OMP-NEXT: store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND26]]
+// OMP: omp.inner.for.end33:
+// OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// OMP: omp.loop.exit:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// OMP-NEXT: br label [[OMP_PRECOND_END]]
+// OMP: cancel.exit34:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// OMP-NEXT: br label [[CANCEL_CONT35:%.*]]
+// OMP: omp.precond.end:
+// OMP-NEXT: br label [[CANCEL_CONT35]]
+// OMP: cancel.cont35:
+// OMP-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// OMP-NEXT: [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// OMP-NEXT: [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
+// OMP-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
+// OMP-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
+// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
+// OMP-NEXT: store i32 0, i32* [[R]], align 4
+// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
+// OMP-NEXT: [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT: ret i32 [[TMP43]]
+//
+//
+// OMP-LABEL: @.omp_outlined.(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8
+// OMP-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT: store i8*** [[ARGV:%.*]], i8**** [[ARGV_ADDR]], align 8
+// OMP-NEXT: store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
+// OMP-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4
+// OMP-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// OMP-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP: omp_if.then:
+// OMP-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+// OMP-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
+// OMP-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
+// OMP-NEXT: br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP: .cancel.exit:
+// OMP-NEXT: br label [[RETURN:%.*]]
+// OMP: .cancel.continue:
+// OMP-NEXT: br label [[OMP_IF_END:%.*]]
+// OMP: omp_if.else:
+// OMP-NEXT: br label [[OMP_IF_END]]
+// OMP: omp_if.end:
+// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT: [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
+// OMP-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// OMP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
+// OMP-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// OMP-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
+// OMP-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
+// OMP-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
+// OMP-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+// OMP-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// OMP: .cancel.exit2:
+// OMP-NEXT: br label [[RETURN]]
+// OMP: .cancel.continue3:
+// OMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// OMP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
+// OMP-NEXT: [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// OMP-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
+// OMP-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// OMP-NEXT: [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
+// OMP-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// OMP-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
+// OMP-NEXT: store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
+// OMP-NEXT: br label [[RETURN]]
+// OMP: return:
+// OMP-NEXT: ret void
+//
+//
+// OMP-LABEL: @.omp_task_entry.(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
+// OMP-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
+// OMP-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
+// OMP-NEXT: store i32 [[TMP0:%.*]], i32* [[DOTADDR]], align 4
+// OMP-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1:%.*]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
+// OMP-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
+// OMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
+// OMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
+// OMP-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
+// OMP-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
+// OMP-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
+// OMP-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
+// OMP-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
+// OMP-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
+// OMP-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
+// OMP-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+// OMP-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
+// OMP: .cancel.exit.i:
+// OMP-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
+// OMP: .cancel.continue.i:
+// OMP-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]]
+// OMP: .omp_outlined..1.exit:
+// OMP-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT: ret i32 0
+//
+//
+// OMP-LABEL: @.omp_outlined..2(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
+// OMP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
+// OMP-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP: omp.inner.for.cond:
+// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
+// OMP-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP: omp.inner.for.body:
+// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT: ]
+// OMP: .omp.sections.case:
+// OMP-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// OMP-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP: .cancel.exit:
+// OMP-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP: .cancel.continue:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP: .omp.sections.exit:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP: omp.inner.for.inc:
+// OMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1
+// OMP-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP: omp.inner.for.end:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP: cancel.cont:
+// OMP-NEXT: ret void
+// OMP: cancel.exit:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT: br label [[CANCEL_CONT]]
+//
+//
+// OMP-LABEL: @.omp_outlined..3(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
+// OMP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
+// OMP-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP: omp.inner.for.cond:
+// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
+// OMP-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP: omp.inner.for.body:
+// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
+// OMP-NEXT: ]
+// OMP: .omp.sections.case:
+// OMP-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// OMP-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP: .cancel.exit:
+// OMP-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP: .cancel.continue:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP: .omp.sections.case1:
+// OMP-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+// OMP-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// OMP: .cancel.exit2:
+// OMP-NEXT: br label [[CANCEL_EXIT]]
+// OMP: .cancel.continue3:
+// OMP-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP: .omp.sections.exit:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP: omp.inner.for.inc:
+// OMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
+// OMP-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP: omp.inner.for.end:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP: cancel.cont:
+// OMP-NEXT: ret void
+// OMP: cancel.exit:
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT: br label [[CANCEL_CONT]]
+//
+//
+// OMP-LABEL: @.omp_outlined..4(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[R_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[R3:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[I4:%.*]] = alloca i32, align 4
+// OMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT: store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT: store i32* [[R:%.*]], i32** [[R_ADDR]], align 8
+// OMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT: [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
+// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
+// OMP-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT: store i32 0, i32* [[I]], align 4
+// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// OMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP: omp.precond.then:
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP-NEXT: store i32 0, i32* [[R3]], align 4
+// OMP-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// OMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP: cond.true:
+// OMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT: br label [[COND_END:%.*]]
+// OMP: cond.false:
+// OMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: br label [[COND_END]]
+// OMP: cond.end:
+// OMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
+// OMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP: omp.inner.for.cond:
+// OMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// OMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP: omp.inner.for.body:
+// OMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
+// OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP-NEXT: store i32 [[ADD]], i32* [[I4]], align 4
+// OMP-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
+// OMP-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
+// OMP-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// OMP-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP: .cancel.exit:
+// OMP-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP: .cancel.continue:
+// OMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
+// OMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// OMP-NEXT: store i32 [[ADD7]], i32* [[R3]], align 4
+// OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP: omp.body.continue:
+// OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP: omp.inner.for.inc:
+// OMP-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
+// OMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP: omp.inner.for.end:
+// OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// OMP: omp.loop.exit:
+// OMP-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// OMP-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// OMP-NEXT: [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
+// OMP-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8
+// OMP-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
+// OMP-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// OMP-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// OMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// OMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// OMP-NEXT: ]
+// OMP: .omp.reduction.case1:
+// OMP-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT: [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
+// OMP-NEXT: store i32 [[ADD9]], i32* [[TMP1]], align 4
+// OMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP: cancel.exit:
+// OMP-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
+// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// OMP-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP: .omp.reduction.case2:
+// OMP-NEXT: [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT: [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
+// OMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP: .omp.reduction.default:
+// OMP-NEXT: br label [[OMP_PRECOND_END]]
+// OMP: omp.precond.end:
+// OMP-NEXT: br label [[CANCEL_CONT]]
+// OMP: cancel.cont:
+// OMP-NEXT: ret void
+//
+//
+// OMP-LABEL: @.omp.reduction.reduction_func(
+// OMP-NEXT: entry:
+// OMP-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
+// OMP-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
+// OMP-NEXT: store i8* [[TMP0:%.*]], i8** [[DOTADDR]], align 8
+// OMP-NEXT: store i8* [[TMP1:%.*]], i8** [[DOTADDR1]], align 8
+// OMP-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// OMP-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// OMP-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// OMP-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// OMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// OMP-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// OMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// OMP-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// OMP-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// OMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// OMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// OMP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// OMP-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
+// OMP-NEXT: ret void
+//
+//
+// OMP_IRB-LABEL: @main(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP_IRB-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_LASTITER26:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[P_STRIDE29:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[I35:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// OMP_IRB-NEXT: [[R:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[RETVAL]], align 4
+// OMP_IRB-NEXT: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// OMP_IRB-NEXT: br label [[OMP_PARALLEL:%.*]]
+// OMP_IRB: omp_parallel:
+// OMP_IRB-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// OMP_IRB-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// OMP_IRB: omp.par.outlined.exit:
+// OMP_IRB-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// OMP_IRB: omp.par.exit.split:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
+// OMP_IRB: omp_section_loop.preheader:
+// OMP_IRB-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[P_STRIDE]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
+// OMP_IRB-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
+// OMP_IRB-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
+// OMP_IRB-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_HEADER:%.*]]
+// OMP_IRB: omp_section_loop.header:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_COND:%.*]]
+// OMP_IRB: omp_section_loop.cond:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
+// OMP_IRB-NEXT: br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
+// OMP_IRB: omp_section_loop.body:
+// OMP_IRB-NEXT: [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
+// OMP_IRB-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1
+// OMP_IRB-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0
+// OMP_IRB-NEXT: switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
+// OMP_IRB-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
+// OMP_IRB-NEXT: ]
+// OMP_IRB: omp_section_loop.inc:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_HEADER]]
+// OMP_IRB: omp_section_loop.exit:
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]]
+// OMP_IRB: omp_section_loop.after:
+// OMP_IRB-NEXT: br label [[OMP_SECTIONS_END:%.*]]
+// OMP_IRB: omp_sections.end:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
+// OMP_IRB: omp_section_loop.preheader12:
+// OMP_IRB-NEXT: store i32 0, i32* [[P_LOWERBOUND27]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[P_UPPERBOUND28]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[P_STRIDE29]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
+// OMP_IRB-NEXT: [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// OMP_IRB-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
+// OMP_IRB-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
+// OMP_IRB: omp_section_loop.header13:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_COND14:%.*]]
+// OMP_IRB: omp_section_loop.cond14:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
+// OMP_IRB-NEXT: br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
+// OMP_IRB: omp_section_loop.body15:
+// OMP_IRB-NEXT: [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// OMP_IRB-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 1
+// OMP_IRB-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0
+// OMP_IRB-NEXT: switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
+// OMP_IRB-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
+// OMP_IRB-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// OMP_IRB-NEXT: ]
+// OMP_IRB: omp_section_loop.inc16:
+// OMP_IRB-NEXT: [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_HEADER13]]
+// OMP_IRB: omp_section_loop.exit17:
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
+// OMP_IRB: omp_section_loop.after18:
+// OMP_IRB-NEXT: br label [[OMP_SECTIONS_END32:%.*]]
+// OMP_IRB: omp_sections.end32:
+// OMP_IRB-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
+// OMP_IRB-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP_IRB-NEXT: [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP_IRB-NEXT: store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[I]], align 4
+// OMP_IRB-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
+// OMP_IRB-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP_IRB: omp.precond.then:
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// OMP_IRB-NEXT: br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP_IRB: cond.true:
+// OMP_IRB-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT: br label [[COND_END:%.*]]
+// OMP_IRB: cond.false:
+// OMP_IRB-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: br label [[COND_END]]
+// OMP_IRB: cond.end:
+// OMP_IRB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
+// OMP_IRB-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB: omp.inner.for.cond:
+// OMP_IRB-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// OMP_IRB-NEXT: br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB: omp.inner.for.body:
+// OMP_IRB-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
+// OMP_IRB-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
+// OMP_IRB-NEXT: store i32 [[ADD39]], i32* [[I35]], align 4
+// OMP_IRB-NEXT: [[TMP26:%.*]] = load float, float* @flag, align 4
+// OMP_IRB-NEXT: [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// OMP_IRB-NEXT: br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP_IRB: omp_if.then:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// OMP_IRB-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// OMP_IRB-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// OMP_IRB-NEXT: br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP_IRB: .cancel.exit:
+// OMP_IRB-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP_IRB: omp_section_loop.body.case:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// OMP_IRB-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
+// OMP_IRB-NEXT: br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
+// OMP_IRB: omp_section_loop.body.case.split:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT]]
+// OMP_IRB: omp_section_loop.body.case.cncl:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT]]
+// OMP_IRB: omp_section_loop.body.case22:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// OMP_IRB-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
+// OMP_IRB-NEXT: br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
+// OMP_IRB: omp_section_loop.body.case22.split:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB: omp_section_loop.body.case22.cncl:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB: omp_section_loop.body.case24:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// OMP_IRB-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
+// OMP_IRB-NEXT: br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
+// OMP_IRB: omp_section_loop.body.case24.split:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB: omp_section_loop.body.case24.cncl:
+// OMP_IRB-NEXT: br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB: .cancel.continue:
+// OMP_IRB-NEXT: br label [[OMP_IF_END:%.*]]
+// OMP_IRB: omp_if.else:
+// OMP_IRB-NEXT: br label [[OMP_IF_END]]
+// OMP_IRB: omp_if.end:
+// OMP_IRB-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP_IRB: omp.body.continue:
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB: omp.inner.for.inc:
+// OMP_IRB-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
+// OMP_IRB-NEXT: store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB: omp.inner.for.end:
+// OMP_IRB-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// OMP_IRB: omp.loop.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// OMP_IRB-NEXT: br label [[OMP_PRECOND_END]]
+// OMP_IRB: cancel.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP_IRB: omp.precond.end:
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+// OMP_IRB: cancel.cont:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// OMP_IRB-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// OMP_IRB-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
+// OMP_IRB-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// OMP_IRB-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// OMP_IRB-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+// OMP_IRB-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP_IRB-NEXT: store i32 0, i32* [[R]], align 4
+// OMP_IRB-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
+// OMP_IRB-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT: ret i32 [[TMP40]]
+//
+//
+// OMP_IRB-LABEL: @main..omp_par(
+// OMP_IRB-NEXT: omp.par.entry:
+// OMP_IRB-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR:%.*]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// OMP_IRB-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+// OMP_IRB-NEXT: br label [[OMP_PAR_REGION:%.*]]
+// OMP_IRB: omp.par.outlined.exit.exitStub:
+// OMP_IRB-NEXT: ret void
+// OMP_IRB: omp.par.region:
+// OMP_IRB-NEXT: [[TMP1:%.*]] = load float, float* @flag, align 4
+// OMP_IRB-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
+// OMP_IRB-NEXT: br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
+// OMP_IRB: 2:
+// OMP_IRB-NEXT: br label [[TMP3:%.*]]
+// OMP_IRB: 3:
+// OMP_IRB-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR:%.*]], align 4
+// OMP_IRB-NEXT: [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
+// OMP_IRB-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR:%.*]], align 8
+// OMP_IRB-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
+// OMP_IRB-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP_IRB-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// OMP_IRB-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// OMP_IRB-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT: br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
+// OMP_IRB: .cncl4:
+// OMP_IRB-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+// OMP_IRB: .cont:
+// OMP_IRB-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_IRB-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// OMP_IRB-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
+// OMP_IRB-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// OMP_IRB-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
+// OMP_IRB-NEXT: [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
+// OMP_IRB-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
+// OMP_IRB-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
+// OMP_IRB-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// OMP_IRB-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+// OMP_IRB: omp.par.pre_finalize:
+// OMP_IRB-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
+// OMP_IRB: 13:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// OMP_IRB-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
+// OMP_IRB-NEXT: br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// OMP_IRB: .cncl:
+// OMP_IRB-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
+// OMP_IRB: .split:
+// OMP_IRB-NEXT: br label [[TMP3]]
+//
+//
+// OMP_IRB-LABEL: @.omp_task_entry.(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
+// OMP_IRB-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
+// OMP_IRB-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
+// OMP_IRB-NEXT: store i32 [[TMP0:%.*]], i32* [[DOTADDR]], align 4
+// OMP_IRB-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1:%.*]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
+// OMP_IRB-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
+// OMP_IRB-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
+// OMP_IRB-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
+// OMP_IRB-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP_IRB-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
+// OMP_IRB-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
+// OMP_IRB-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
+// OMP_IRB-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
+// OMP_IRB-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
+// OMP_IRB-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
+// OMP_IRB-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP_IRB-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
+// OMP_IRB-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
+// OMP_IRB-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+// OMP_IRB-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
+// OMP_IRB: .cancel.exit.i:
+// OMP_IRB-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]]
+// OMP_IRB: .cancel.continue.i:
+// OMP_IRB-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
+// OMP_IRB: .omp_outlined..exit:
+// OMP_IRB-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT: ret i32 0
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..1(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
+// OMP_IRB-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+// OMP_IRB-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB: omp.inner.for.cond:
+// OMP_IRB-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
+// OMP_IRB-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB: omp.inner.for.body:
+// OMP_IRB-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP_IRB-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP_IRB-NEXT: ]
+// OMP_IRB: .omp.sections.case:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
+// OMP_IRB-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
+// OMP_IRB: .omp.sections.case.split:
+// OMP_IRB-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB: .omp.sections.case.cncl:
+// OMP_IRB-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP_IRB: .omp.sections.exit:
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB: omp.inner.for.inc:
+// OMP_IRB-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// OMP_IRB-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB: omp.inner.for.end:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+// OMP_IRB: cancel.cont:
+// OMP_IRB-NEXT: ret void
+// OMP_IRB: cancel.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..2(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
+// OMP_IRB-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
+// OMP_IRB-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB: omp.inner.for.cond:
+// OMP_IRB-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
+// OMP_IRB-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB: omp.inner.for.body:
+// OMP_IRB-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP_IRB-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP_IRB-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
+// OMP_IRB-NEXT: ]
+// OMP_IRB: .omp.sections.case:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
+// OMP_IRB-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
+// OMP_IRB: .omp.sections.case.split:
+// OMP_IRB-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB: .omp.sections.case.cncl:
+// OMP_IRB-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP_IRB: .omp.sections.case2:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
+// OMP_IRB-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
+// OMP_IRB-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
+// OMP_IRB: .omp.sections.case2.split:
+// OMP_IRB-NEXT: br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB: .omp.sections.case2.cncl:
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_END]]
+// OMP_IRB: .omp.sections.exit:
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB: omp.inner.for.inc:
+// OMP_IRB-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1
+// OMP_IRB-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB: omp.inner.for.end:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+// OMP_IRB: cancel.cont:
+// OMP_IRB-NEXT: ret void
+// OMP_IRB: cancel.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..3(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[R_ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[R3:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[I4:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// OMP_IRB-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT: store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP_IRB-NEXT: store i32* [[R:%.*]], i32** [[R_ADDR]], align 8
+// OMP_IRB-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP_IRB-NEXT: [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
+// OMP_IRB-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
+// OMP_IRB-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP_IRB-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP_IRB-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[I]], align 4
+// OMP_IRB-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// OMP_IRB-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP_IRB: omp.precond.then:
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP_IRB-NEXT: store i32 0, i32* [[R3]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// OMP_IRB-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP_IRB: cond.true:
+// OMP_IRB-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT: br label [[COND_END:%.*]]
+// OMP_IRB: cond.false:
+// OMP_IRB-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: br label [[COND_END]]
+// OMP_IRB: cond.end:
+// OMP_IRB-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
+// OMP_IRB-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB: omp.inner.for.cond:
+// OMP_IRB-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// OMP_IRB-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB: omp.inner.for.body:
+// OMP_IRB-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// OMP_IRB-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP_IRB-NEXT: store i32 [[ADD]], i32* [[I4]], align 4
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
+// OMP_IRB-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
+// OMP_IRB-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// OMP_IRB-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP_IRB: .cancel.exit:
+// OMP_IRB-NEXT: br label [[CANCEL_EXIT:%.*]]
+// OMP_IRB: .cancel.continue:
+// OMP_IRB-NEXT: [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
+// OMP_IRB-NEXT: [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// OMP_IRB-NEXT: store i32 [[ADD8]], i32* [[R3]], align 4
+// OMP_IRB-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP_IRB: omp.body.continue:
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB: omp.inner.for.inc:
+// OMP_IRB-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
+// OMP_IRB-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT: br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB: omp.inner.for.end:
+// OMP_IRB-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// OMP_IRB: omp.loop.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// OMP_IRB-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// OMP_IRB-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
+// OMP_IRB-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
+// OMP_IRB-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// OMP_IRB-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP_IRB-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// OMP_IRB-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// OMP_IRB-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// OMP_IRB-NEXT: ]
+// OMP_IRB: .omp.reduction.case1:
+// OMP_IRB-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP_IRB-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// OMP_IRB-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4
+// OMP_IRB-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP_IRB-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP_IRB: cancel.exit:
+// OMP_IRB-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
+// OMP_IRB-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
+// OMP_IRB-NEXT: br label [[CANCEL_CONT:%.*]]
+// OMP_IRB: .omp.reduction.case2:
+// OMP_IRB-NEXT: [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
+// OMP_IRB-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP_IRB: .omp.reduction.default:
+// OMP_IRB-NEXT: br label [[OMP_PRECOND_END]]
+// OMP_IRB: omp.precond.end:
+// OMP_IRB-NEXT: br label [[CANCEL_CONT]]
+// OMP_IRB: cancel.cont:
+// OMP_IRB-NEXT: ret void
+//
+//
+// OMP_IRB-LABEL: @.omp.reduction.reduction_func(
+// OMP_IRB-NEXT: entry:
+// OMP_IRB-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT: store i8* [[TMP0:%.*]], i8** [[DOTADDR]], align 8
+// OMP_IRB-NEXT: store i8* [[TMP1:%.*]], i8** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// OMP_IRB-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// OMP_IRB-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// OMP_IRB-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// OMP_IRB-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP_IRB-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// OMP_IRB-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// OMP_IRB-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// OMP_IRB-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// OMP_IRB-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// OMP_IRB-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// OMP_IRB-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// OMP_IRB-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4
+// OMP_IRB-NEXT: ret void
+//
+//
+// OMP_SIMD-LABEL: @main(
+// OMP_SIMD-NEXT: entry:
+// OMP_SIMD-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP_SIMD-NEXT: [[I:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT: [[R:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT: [[I6:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT: store i32 0, i32* [[RETVAL]], align 4
+// OMP_SIMD-NEXT: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8
+// OMP_SIMD-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0
+// OMP_SIMD-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP_SIMD-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0
+// OMP_SIMD-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
+// OMP_SIMD-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0
+// OMP_SIMD-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8
+// OMP_SIMD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0
+// OMP_SIMD-NEXT: [[TMP6:%.*]] = load i8, i8* [[ARRAYIDX3]], align 1
+// OMP_SIMD-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32
+// OMP_SIMD-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV4]], [[TMP3]]
+// OMP_SIMD-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD]] to i8
+// OMP_SIMD-NEXT: store i8 [[CONV5]], i8* [[ARRAYIDX3]], align 1
+// OMP_SIMD-NEXT: store i32 0, i32* [[I]], align 4
+// OMP_SIMD-NEXT: br label [[FOR_COND:%.*]]
+// OMP_SIMD: for.cond:
+// OMP_SIMD-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
+// OMP_SIMD-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]]
+// OMP_SIMD-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// OMP_SIMD: for.body:
+// OMP_SIMD-NEXT: br label [[FOR_INC:%.*]]
+// OMP_SIMD: for.inc:
+// OMP_SIMD-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+// OMP_SIMD-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// OMP_SIMD-NEXT: store i32 [[INC]], i32* [[I]], align 4
+// OMP_SIMD-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// OMP_SIMD: for.end:
+// OMP_SIMD-NEXT: store i32 0, i32* [[R]], align 4
+// OMP_SIMD-NEXT: store i32 0, i32* [[I6]], align 4
+// OMP_SIMD-NEXT: br label [[FOR_COND7:%.*]]
+// OMP_SIMD: for.cond7:
+// OMP_SIMD-NEXT: [[TMP10:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// OMP_SIMD-NEXT: br i1 [[CMP8]], label [[FOR_BODY9:%.*]], label [[FOR_END13:%.*]]
+// OMP_SIMD: for.body9:
+// OMP_SIMD-NEXT: [[TMP12:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT: [[TMP13:%.*]] = load i32, i32* [[R]], align 4
+// OMP_SIMD-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// OMP_SIMD-NEXT: store i32 [[ADD10]], i32* [[R]], align 4
+// OMP_SIMD-NEXT: br label [[FOR_INC11:%.*]]
+// OMP_SIMD: for.inc11:
+// OMP_SIMD-NEXT: [[TMP14:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT: [[INC12:%.*]] = add nsw i32 [[TMP14]], 1
+// OMP_SIMD-NEXT: store i32 [[INC12]], i32* [[I6]], align 4
+// OMP_SIMD-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP4:![0-9]+]]
+// OMP_SIMD: for.end13:
+// OMP_SIMD-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT: ret i32 [[TMP15]]
+//
More information about the cfe-commits
mailing list