[clang] c204106 - [Clang][OpenMP] Frontend work for sections - D89671

Chirag Khandelwal via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 29 07:22:34 PDT 2021


Author: Chirag Khandelwal
Date: 2021-04-29T19:52:27+05:30
New Revision: c20410618827b7870fbc86d45ff8e9b11bc169b4

URL: https://github.com/llvm/llvm-project/commit/c20410618827b7870fbc86d45ff8e9b11bc169b4
DIFF: https://github.com/llvm/llvm-project/commit/c20410618827b7870fbc86d45ff8e9b11bc169b4.diff

LOG: [Clang][OpenMP] Frontend work for sections - D89671

This patch is child of D89671, contains the clang
implementation to use the OpenMP IRBuilder's section
construct.

Co-author: @anchu-rajendran

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D91054

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/test/OpenMP/cancel_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0a408837d1c6a..a3c7365fafd87 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1205,7 +1205,7 @@ namespace {
 // Builder if one is present.
 struct PushAndPopStackRAII {
   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
-                      bool HasCancel)
+                      bool HasCancel, llvm::omp::Directive Kind)
       : OMPBuilder(OMPBuilder) {
     if (!OMPBuilder)
       return;
@@ -1234,8 +1234,7 @@ struct PushAndPopStackRAII {
 
     // TODO: Remove this once we emit parallel regions through the
     //       OpenMPIRBuilder as it can do this setup internally.
-    llvm::OpenMPIRBuilder::FinalizationInfo FI(
-        {FiniCB, OMPD_parallel, HasCancel});
+    llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
     OMPBuilder->pushFinalizationCB(std::move(FI));
   }
   ~PushAndPopStackRAII() {
@@ -1276,7 +1275,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
   //       parallel region to make cancellation barriers work properly.
   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
-  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
+  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
                                     HasCancel, OutlinedHelperName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);

diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 7d8744651a4ee..83d3dd0fc813c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3791,6 +3791,64 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+    using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
+
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    const CapturedStmt *ICS = S.getInnermostCapturedStmt();
+    const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
+    const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
+    llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
+    if (CS) {
+      for (const Stmt *SubStmt : CS->children()) {
+        auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
+                                         InsertPointTy CodeGenIP,
+                                         llvm::BasicBlock &FiniBB) {
+          OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
+                                                         FiniBB);
+          OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
+                                                 FiniBB);
+        };
+        SectionCBVector.push_back(SectionCB);
+      }
+    } else {
+      auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
+                                            InsertPointTy CodeGenIP,
+                                            llvm::BasicBlock &FiniBB) {
+        OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+        OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
+                                               FiniBB);
+      };
+      SectionCBVector.push_back(SectionCB);
+    }
+
+    // Privatization callback that performs appropriate action for
+    // shared/private/firstprivate/lastprivate/copyin/... variables.
+    //
+    // TODO: This defaults to shared right now.
+    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
+      // The next line is appropriate only for variables (Val) with the
+      // data-sharing attribute "shared".
+      ReplVal = &Val;
+
+      return CodeGenIP;
+    };
+
+    CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
+    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+    Builder.restoreIP(OMPBuilder.createSections(
+        Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
+        S.getSingleClause<OMPNowaitClause>()));
+    return;
+  }
   {
     auto LPCRegion =
         CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
@@ -3807,6 +3865,29 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
+                                                   InsertPointTy CodeGenIP,
+                                                   llvm::BasicBlock &FiniBB) {
+      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
+                                             CodeGenIP, FiniBB);
+    };
+
+    LexicalScope Scope(*this, S.getSourceRange());
+    EmitStopPoint(&S);
+    Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
+
+    return;
+  }
   LexicalScope Scope(*this, S.getSourceRange());
   EmitStopPoint(&S);
   EmitStmt(S.getAssociatedStmt());
@@ -6312,7 +6393,9 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
     llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
     // TODO: This check is necessary as we only generate `omp parallel` through
     // the OpenMPIRBuilder for now.
-    if (S.getCancelRegion() == OMPD_parallel) {
+    if (S.getCancelRegion() == OMPD_parallel ||
+        S.getCancelRegion() == OMPD_sections ||
+        S.getCancelRegion() == OMPD_section) {
       llvm::Value *IfCondition = nullptr;
       if (IfCond)
         IfCondition = EmitScalarExpr(IfCond,

diff  --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 80e2e294a60c9..fa84a239c71a7 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1,28 +1,27 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.0 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t.0 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP
 
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_IRB
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.1 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.1 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_IRB
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_SIMD
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.2 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t.2 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_SIMD
 
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.3 %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t.3 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP
 
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
-// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_IRB
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.4 %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -std=c++11 -include-pch %t.4 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_IRB
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OMP_SIMD
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t.5 %s
+// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t.5 -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefix=OMP_SIMD
 
 // expected-no-diagnostics
 #ifndef HEADER
@@ -37,15 +36,10 @@ int main (int argc, char **argv) {
 #pragma omp barrier
   argv[0][0] += argc;
 }
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 #pragma omp sections
 {
 #pragma omp cancel sections
 }
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: call i32 @__kmpc_cancel(
-// ALL: call void @__kmpc_for_static_fini(
-// ALL: call void @__kmpc_barrier(%struct.ident_t*
 #pragma omp sections
 {
 #pragma omp cancel sections
@@ -54,53 +48,18 @@ int main (int argc, char **argv) {
 #pragma omp cancel sections
   }
 }
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: call void @__kmpc_for_static_fini(
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
 #pragma omp cancel for if(cancel: flag)
 }
-// ALL: call void @__kmpc_for_static_init_4(
-// ALL: [[FLAG:%.+]] = load float, float* @{{.+}},
-// ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// ALL: [[THEN]]
-// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 2)
-// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// ALL: [[EXIT]]
-// ALL: br label
-// ALL: [[CONTINUE]]
-// ALL: br label
-// ALL: [[ELSE]]
-// ALL: br label
-// ALL: call void @__kmpc_for_static_fini(
-// ALL: call void @__kmpc_barrier(%struct.ident_t*
 #pragma omp task
 {
 #pragma omp cancel taskgroup
 }
-// ALL: call i8* @__kmpc_omp_task_alloc(
-// ALL: call i32 @__kmpc_omp_task(
 #pragma omp parallel sections
 {
 #pragma omp cancel sections
 }
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 #pragma omp parallel sections
 {
 #pragma omp cancel sections
@@ -109,104 +68,1307 @@ for (int i = 0; i < argc; ++i) {
 #pragma omp cancel sections
   }
 }
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 int r = 0;
 #pragma omp parallel for reduction(+: r)
 for (int i = 0; i < argc; ++i) {
 #pragma omp cancel for
   r += i;
 }
-// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
   return argc;
 }
 
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
-// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
-// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// CHECK: [[THEN]]
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
-// CHECK: [[EXIT]]
-// CHECK: br label %[[RETURN:.+]]
-// CHECK: [[ELSE]]
-// The barrier directive should now call __kmpc_cancel_barrier
-// CHECK: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
-// CHECK: br label
-// CHECK: [[RETURN]]
-// CHECK: ret void
-
-// CHECK: define internal i32 @{{[^(]+}}(i32
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 4)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
-// CHECK: [[EXIT]]
-// CHECK: br label %[[RETURN:.+]]
-// CHECK: [[RETURN]]
-// CHECK: ret i32 0
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: call i32 @__kmpc_cancel(
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
-// CHECK: call void @__kmpc_for_static_init_4(
-// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 2)
-// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
-// CHECK: [[EXIT]]
-// CHECK: br label
-// CHECK: [[CONTINUE]]
-// CHECK: br label
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call i32 @__kmpc_reduce_nowait(
-// CHECK: call void @__kmpc_end_reduce_nowait(
-// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: ret void
-
-// IRBUILDER: define internal void @main
-
-// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
-// IRBUILDER-NEXT: ret void
-// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
-
-// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
-// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
-// IRBUILDER: [[ELSE]]
-// IRBUILDER-NEXT:   br label %[[ELSE2:.*]]
-// IRBUILDER: [[ELSE2]]
-// The barrier directive should now call __kmpc_cancel_barrier
-// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
-// IRBUILDER: br label
-// IRBUILDER: [[THEN]]
-// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
-// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
-// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
-// IRBUILDER: [[EXIT]]
-// IRBUILDER: br label %[[RETURN]]
-// IRBUILDER: [[CONTINUE]]
-// IRBUILDER: br label %[[ELSE2:.+]]
-
 #endif
+// OMP-LABEL: @main(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_LB_1:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_UB_2:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_ST_3:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IL_4:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IV_5:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[I24:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// OMP-NEXT:    [[R:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// OMP-NEXT:    store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT:    store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]])
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+// OMP-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+// OMP-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP:       omp.inner.for.cond:
+// OMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
+// OMP-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP:       omp.inner.for.body:
+// OMP-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT:    ]
+// OMP:       .omp.sections.case:
+// OMP-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// OMP-NEXT:    br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP:       .cancel.exit:
+// OMP-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP:       .cancel.continue:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP:       .omp.sections.exit:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP:       omp.inner.for.inc:
+// OMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
+// OMP-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP:       omp.inner.for.end:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP:       cancel.cont:
+// OMP-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1
+// OMP-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1
+// OMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4
+// OMP-NEXT:    store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND6:%.*]]
+// OMP:       omp.inner.for.cond6:
+// OMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4
+// OMP-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// OMP-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
+// OMP:       omp.inner.for.body8:
+// OMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [
+// OMP-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]]
+// OMP-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]]
+// OMP-NEXT:    ]
+// OMP:       .omp.sections.case9:
+// OMP-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// OMP-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]]
+// OMP:       .cancel.exit10:
+// OMP-NEXT:    br label [[CANCEL_EXIT19:%.*]]
+// OMP:       cancel.exit:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[CANCEL_CONT]]
+// OMP:       .cancel.continue11:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
+// OMP:       .omp.sections.case12:
+// OMP-NEXT:    [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3)
+// OMP-NEXT:    [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// OMP-NEXT:    br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]]
+// OMP:       .cancel.exit13:
+// OMP-NEXT:    br label [[CANCEL_EXIT19]]
+// OMP:       .cancel.continue14:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT15]]
+// OMP:       .omp.sections.exit15:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
+// OMP:       omp.inner.for.inc16:
+// OMP-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT:    [[INC17:%.*]] = add nsw i32 [[TMP22]], 1
+// OMP-NEXT:    store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND6]]
+// OMP:       omp.inner.for.end18:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[CANCEL_CONT20:%.*]]
+// OMP:       cancel.cont20:
+// OMP-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// OMP-NEXT:    [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT:    store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0
+// OMP-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP-NEXT:    [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP-NEXT:    store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]]
+// OMP-NEXT:    br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP:       omp.precond.then:
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT:    store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]]
+// OMP-NEXT:    br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP:       cond.true:
+// OMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
+// OMP-NEXT:    br label [[COND_END:%.*]]
+// OMP:       cond.false:
+// OMP-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    br label [[COND_END]]
+// OMP:       cond.end:
+// OMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ]
+// OMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT:    store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND26:%.*]]
+// OMP:       omp.inner.for.cond26:
+// OMP-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]]
+// OMP-NEXT:    br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]]
+// OMP:       omp.inner.for.body28:
+// OMP-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1
+// OMP-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP-NEXT:    store i32 [[ADD]], i32* [[I24]], align 4
+// OMP-NEXT:    [[TMP35:%.*]] = load float, float* @flag, align 4
+// OMP-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00
+// OMP-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP:       omp_if.then:
+// OMP-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2)
+// OMP-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
+// OMP-NEXT:    br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]]
+// OMP:       .cancel.exit29:
+// OMP-NEXT:    br label [[CANCEL_EXIT34:%.*]]
+// OMP:       cancel.exit19:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[CANCEL_CONT20]]
+// OMP:       .cancel.continue30:
+// OMP-NEXT:    br label [[OMP_IF_END:%.*]]
+// OMP:       omp_if.else:
+// OMP-NEXT:    br label [[OMP_IF_END]]
+// OMP:       omp_if.end:
+// OMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP:       omp.body.continue:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC31:%.*]]
+// OMP:       omp.inner.for.inc31:
+// OMP-NEXT:    [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1
+// OMP-NEXT:    store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND26]]
+// OMP:       omp.inner.for.end33:
+// OMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// OMP:       omp.loop.exit:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[OMP_PRECOND_END]]
+// OMP:       cancel.exit34:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
+// OMP-NEXT:    br label [[CANCEL_CONT35:%.*]]
+// OMP:       omp.precond.end:
+// OMP-NEXT:    br label [[CANCEL_CONT35]]
+// OMP:       cancel.cont35:
+// OMP-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]])
+// OMP-NEXT:    [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// OMP-NEXT:    [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates*
+// OMP-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0
+// OMP-NEXT:    [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]])
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    store i32 0, i32* [[R]], align 4
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
+// OMP-NEXT:    [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP-NEXT:    ret i32 [[TMP43]]
+//
+//
+// OMP-LABEL: @.omp_outlined.(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8***, align 8
+// OMP-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    store i8*** [[ARGV:%.*]], i8**** [[ARGV_ADDR]], align 8
+// OMP-NEXT:    store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8
+// OMP-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
+// OMP-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// OMP-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP:       omp_if.then:
+// OMP-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+// OMP-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1)
+// OMP-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
+// OMP-NEXT:    br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP:       .cancel.exit:
+// OMP-NEXT:    br label [[RETURN:%.*]]
+// OMP:       .cancel.continue:
+// OMP-NEXT:    br label [[OMP_IF_END:%.*]]
+// OMP:       omp_if.else:
+// OMP-NEXT:    br label [[OMP_IF_END]]
+// OMP:       omp_if.end:
+// OMP-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP7]] to i8
+// OMP-NEXT:    [[TMP8:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// OMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 0
+// OMP-NEXT:    [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 0
+// OMP-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
+// OMP-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
+// OMP-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]])
+// OMP-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+// OMP-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// OMP:       .cancel.exit2:
+// OMP-NEXT:    br label [[RETURN]]
+// OMP:       .cancel.continue3:
+// OMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT:    [[TMP15:%.*]] = load i8**, i8*** [[TMP0]], align 8
+// OMP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 0
+// OMP-NEXT:    [[TMP16:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8
+// OMP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 0
+// OMP-NEXT:    [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1
+// OMP-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP17]] to i32
+// OMP-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP14]]
+// OMP-NEXT:    [[CONV7:%.*]] = trunc i32 [[ADD]] to i8
+// OMP-NEXT:    store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1
+// OMP-NEXT:    br label [[RETURN]]
+// OMP:       return:
+// OMP-NEXT:    ret void
+//
+//
+// OMP-LABEL: @.omp_task_entry.(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
+// OMP-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
+// OMP-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
+// OMP-NEXT:    store i32 [[TMP0:%.*]], i32* [[DOTADDR]], align 4
+// OMP-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1:%.*]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
+// OMP-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
+// OMP-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
+// OMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
+// OMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
+// OMP-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
+// OMP-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
+// OMP-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
+// OMP-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
+// OMP-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
+// OMP-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP-NEXT:    [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]]
+// OMP-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+// OMP-NEXT:    br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
+// OMP:       .cancel.exit.i:
+// OMP-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
+// OMP:       .cancel.continue.i:
+// OMP-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT:    br label [[DOTOMP_OUTLINED__1_EXIT]]
+// OMP:       .omp_outlined..1.exit:
+// OMP-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP-NEXT:    ret i32 0
+//
+//
+// OMP-LABEL: @.omp_outlined..2(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
+// OMP-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0
+// OMP-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP:       omp.inner.for.cond:
+// OMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
+// OMP-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP:       omp.inner.for.body:
+// OMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT:    ]
+// OMP:       .omp.sections.case:
+// OMP-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// OMP-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP:       .cancel.exit:
+// OMP-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP:       .cancel.continue:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP:       .omp.sections.exit:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP:       omp.inner.for.inc:
+// OMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
+// OMP-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP:       omp.inner.for.end:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP:       cancel.cont:
+// OMP-NEXT:    ret void
+// OMP:       cancel.exit:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT:    br label [[CANCEL_CONT]]
+//
+//
+// OMP-LABEL: @.omp_outlined..3(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1
+// OMP-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1
+// OMP-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP:       omp.inner.for.cond:
+// OMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
+// OMP-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP:       omp.inner.for.body:
+// OMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]]
+// OMP-NEXT:    ]
+// OMP:       .omp.sections.case:
+// OMP-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// OMP-NEXT:    br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP:       .cancel.exit:
+// OMP-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP:       .cancel.continue:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP:       .omp.sections.case1:
+// OMP-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3)
+// OMP-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+// OMP-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]]
+// OMP:       .cancel.exit2:
+// OMP-NEXT:    br label [[CANCEL_EXIT]]
+// OMP:       .cancel.continue3:
+// OMP-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP:       .omp.sections.exit:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP:       omp.inner.for.inc:
+// OMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
+// OMP-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP:       omp.inner.for.end:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP:       cancel.cont:
+// OMP-NEXT:    ret void
+// OMP:       cancel.exit:
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]])
+// OMP-NEXT:    br label [[CANCEL_CONT]]
+//
+//
+// OMP-LABEL: @.omp_outlined..4(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[R3:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT:    store i32* [[R:%.*]], i32** [[R_ADDR]], align 8
+// OMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
+// OMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
+// OMP-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// OMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP:       omp.precond.then:
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP-NEXT:    store i32 0, i32* [[R3]], align 4
+// OMP-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
+// OMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP:       cond.true:
+// OMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP-NEXT:    br label [[COND_END:%.*]]
+// OMP:       cond.false:
+// OMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    br label [[COND_END]]
+// OMP:       cond.end:
+// OMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
+// OMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP-NEXT:    store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP:       omp.inner.for.cond:
+// OMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// OMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP:       omp.inner.for.body:
+// OMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1
+// OMP-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
+// OMP-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
+// OMP-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2)
+// OMP-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// OMP-NEXT:    br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP:       .cancel.exit:
+// OMP-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP:       .cancel.continue:
+// OMP-NEXT:    [[TMP20:%.*]] = load i32, i32* [[I4]], align 4
+// OMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]]
+// OMP-NEXT:    store i32 [[ADD7]], i32* [[R3]], align 4
+// OMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP:       omp.body.continue:
+// OMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP:       omp.inner.for.inc:
+// OMP-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1
+// OMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// OMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP:       omp.inner.for.end:
+// OMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// OMP:       omp.loop.exit:
+// OMP-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP24]])
+// OMP-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// OMP-NEXT:    [[TMP26:%.*]] = bitcast i32* [[R3]] to i8*
+// OMP-NEXT:    store i8* [[TMP26]], i8** [[TMP25]], align 8
+// OMP-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4
+// OMP-NEXT:    [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// OMP-NEXT:    [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP-NEXT:    switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// OMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// OMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// OMP-NEXT:    ]
+// OMP:       .omp.reduction.case1:
+// OMP-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP-NEXT:    [[TMP32:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
+// OMP-NEXT:    store i32 [[ADD9]], i32* [[TMP1]], align 4
+// OMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP:       cancel.exit:
+// OMP-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
+// OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP34]])
+// OMP-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP:       .omp.reduction.case2:
+// OMP-NEXT:    [[TMP35:%.*]] = load i32, i32* [[R3]], align 4
+// OMP-NEXT:    [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4
+// OMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP:       .omp.reduction.default:
+// OMP-NEXT:    br label [[OMP_PRECOND_END]]
+// OMP:       omp.precond.end:
+// OMP-NEXT:    br label [[CANCEL_CONT]]
+// OMP:       cancel.cont:
+// OMP-NEXT:    ret void
+//
+//
+// OMP-LABEL: @.omp.reduction.reduction_func(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// OMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// OMP-NEXT:    store i8* [[TMP0:%.*]], i8** [[DOTADDR]], align 8
+// OMP-NEXT:    store i8* [[TMP1:%.*]], i8** [[DOTADDR1]], align 8
+// OMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// OMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// OMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// OMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// OMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// OMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// OMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// OMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// OMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// OMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// OMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// OMP-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// OMP-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
+// OMP-NEXT:    ret void
+//
+//
+// OMP_IRB-LABEL: @main(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP_IRB-NEXT:    [[P_LASTITER:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_LASTITER26:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_LOWERBOUND27:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_UPPERBOUND28:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[P_STRIDE29:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTCAPTURE_EXPR_33:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[I35:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// OMP_IRB-NEXT:    [[R:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// OMP_IRB-NEXT:    store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT:    store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// OMP_IRB-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// OMP_IRB:       omp_parallel:
+// OMP_IRB-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// OMP_IRB-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
+// OMP_IRB:       omp.par.outlined.exit:
+// OMP_IRB-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// OMP_IRB:       omp.par.exit.split:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER:%.*]]
+// OMP_IRB:       omp_section_loop.preheader:
+// OMP_IRB-NEXT:    store i32 0, i32* [[P_LOWERBOUND]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[P_UPPERBOUND]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[P_STRIDE]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4
+// OMP_IRB-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]]
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_HEADER:%.*]]
+// OMP_IRB:       omp_section_loop.header:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER]] ], [ [[OMP_SECTION_LOOP_NEXT:%.*]], [[OMP_SECTION_LOOP_INC:%.*]] ]
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_COND:%.*]]
+// OMP_IRB:       omp_section_loop.cond:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV]], [[TMP3]]
+// OMP_IRB-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP]], label [[OMP_SECTION_LOOP_BODY:%.*]], label [[OMP_SECTION_LOOP_EXIT:%.*]]
+// OMP_IRB:       omp_section_loop.body:
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = add i32 [[OMP_SECTION_LOOP_IV]], [[TMP0]]
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], 1
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
+// OMP_IRB-NEXT:    switch i32 [[TMP6]], label [[OMP_SECTION_LOOP_INC]] [
+// OMP_IRB-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]]
+// OMP_IRB-NEXT:    ]
+// OMP_IRB:       omp_section_loop.inc:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_HEADER]]
+// OMP_IRB:       omp_section_loop.exit:
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
+// OMP_IRB:       omp_section_loop.after:
+// OMP_IRB-NEXT:    br label [[OMP_SECTIONS_END:%.*]]
+// OMP_IRB:       omp_sections.end:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER12:%.*]]
+// OMP_IRB:       omp_section_loop.preheader12:
+// OMP_IRB-NEXT:    store i32 0, i32* [[P_LOWERBOUND27]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[P_UPPERBOUND28]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[P_STRIDE29]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM30:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]], i32 34, i32* [[P_LASTITER26]], i32* [[P_LOWERBOUND27]], i32* [[P_UPPERBOUND28]], i32* [[P_STRIDE29]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = load i32, i32* [[P_LOWERBOUND27]], align 4
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = load i32, i32* [[P_UPPERBOUND28]], align 4
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 1
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13:%.*]]
+// OMP_IRB:       omp_section_loop.header13:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_IV19:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER12]] ], [ [[OMP_SECTION_LOOP_NEXT21:%.*]], [[OMP_SECTION_LOOP_INC16:%.*]] ]
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_COND14:%.*]]
+// OMP_IRB:       omp_section_loop.cond14:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_CMP20:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV19]], [[TMP10]]
+// OMP_IRB-NEXT:    br i1 [[OMP_SECTION_LOOP_CMP20]], label [[OMP_SECTION_LOOP_BODY15:%.*]], label [[OMP_SECTION_LOOP_EXIT17:%.*]]
+// OMP_IRB:       omp_section_loop.body15:
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = add i32 [[OMP_SECTION_LOOP_IV19]], [[TMP7]]
+// OMP_IRB-NEXT:    [[TMP12:%.*]] = mul i32 [[TMP11]], 1
+// OMP_IRB-NEXT:    [[TMP13:%.*]] = add i32 [[TMP12]], 0
+// OMP_IRB-NEXT:    switch i32 [[TMP13]], label [[OMP_SECTION_LOOP_INC16]] [
+// OMP_IRB-NEXT:    i32 0, label [[OMP_SECTION_LOOP_BODY_CASE22:%.*]]
+// OMP_IRB-NEXT:    i32 1, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]]
+// OMP_IRB-NEXT:    ]
+// OMP_IRB:       omp_section_loop.inc16:
+// OMP_IRB-NEXT:    [[OMP_SECTION_LOOP_NEXT21]] = add nuw i32 [[OMP_SECTION_LOOP_IV19]], 1
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_HEADER13]]
+// OMP_IRB:       omp_section_loop.exit17:
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM30]])
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM31:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM31]])
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_AFTER18:%.*]]
+// OMP_IRB:       omp_section_loop.after18:
+// OMP_IRB-NEXT:    br label [[OMP_SECTIONS_END32:%.*]]
+// OMP_IRB:       omp_sections.end32:
+// OMP_IRB-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0
+// OMP_IRB-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP_IRB-NEXT:    [[SUB34:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP_IRB-NEXT:    store i32 [[SUB34]], i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP_IRB-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP16]]
+// OMP_IRB-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP_IRB:       omp.precond.then:
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM36:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM36]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT:    [[CMP37:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// OMP_IRB-NEXT:    br i1 [[CMP37]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP_IRB:       cond.true:
+// OMP_IRB-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_33]], align 4
+// OMP_IRB-NEXT:    br label [[COND_END:%.*]]
+// OMP_IRB:       cond.false:
+// OMP_IRB-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    br label [[COND_END]]
+// OMP_IRB:       cond.end:
+// OMP_IRB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
+// OMP_IRB-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB:       omp.inner.for.cond:
+// OMP_IRB-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[CMP38:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// OMP_IRB-NEXT:    br i1 [[CMP38]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB:       omp.inner.for.body:
+// OMP_IRB-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
+// OMP_IRB-NEXT:    [[ADD39:%.*]] = add nsw i32 0, [[MUL]]
+// OMP_IRB-NEXT:    store i32 [[ADD39]], i32* [[I35]], align 4
+// OMP_IRB-NEXT:    [[TMP26:%.*]] = load float, float* @flag, align 4
+// OMP_IRB-NEXT:    [[TOBOOL40:%.*]] = fcmp une float [[TMP26]], 0.000000e+00
+// OMP_IRB-NEXT:    br i1 [[TOBOOL40]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// OMP_IRB:       omp_if.then:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
+// OMP_IRB-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]], i32 2)
+// OMP_IRB-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP28]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP_IRB:       .cancel.exit:
+// OMP_IRB-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP_IRB:       omp_section_loop.body.case:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP29:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], i32 3)
+// OMP_IRB-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[TMP29]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP30]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]]
+// OMP_IRB:       omp_section_loop.body.case.split:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
+// OMP_IRB:       omp_section_loop.body.case.cncl:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
+// OMP_IRB:       omp_section_loop.body.case22:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM23:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]], i32 3)
+// OMP_IRB-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP32]], label [[OMP_SECTION_LOOP_BODY_CASE22_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE22_CNCL:%.*]]
+// OMP_IRB:       omp_section_loop.body.case22.split:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB:       omp_section_loop.body.case22.cncl:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB:       omp_section_loop.body.case24:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3)
+// OMP_IRB-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP34]], label [[OMP_SECTION_LOOP_BODY_CASE24_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]]
+// OMP_IRB:       omp_section_loop.body.case24.split:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB:       omp_section_loop.body.case24.cncl:
+// OMP_IRB-NEXT:    br label [[OMP_SECTION_LOOP_EXIT17]]
+// OMP_IRB:       .cancel.continue:
+// OMP_IRB-NEXT:    br label [[OMP_IF_END:%.*]]
+// OMP_IRB:       omp_if.else:
+// OMP_IRB-NEXT:    br label [[OMP_IF_END]]
+// OMP_IRB:       omp_if.end:
+// OMP_IRB-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP_IRB:       omp.body.continue:
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB:       omp.inner.for.inc:
+// OMP_IRB-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP35]], 1
+// OMP_IRB-NEXT:    store i32 [[ADD42]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB:       omp.inner.for.end:
+// OMP_IRB-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// OMP_IRB:       omp.loop.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]])
+// OMP_IRB-NEXT:    br label [[OMP_PRECOND_END]]
+// OMP_IRB:       cancel.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM43:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM43]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP_IRB:       omp.precond.end:
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+// OMP_IRB:       cancel.cont:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM45]])
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]])
+// OMP_IRB-NEXT:    [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM46]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*))
+// OMP_IRB-NEXT:    [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates*
+// OMP_IRB-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]])
+// OMP_IRB-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i8* [[TMP36]])
+// OMP_IRB-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+// OMP_IRB-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP_IRB-NEXT:    store i32 0, i32* [[R]], align 4
+// OMP_IRB-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]])
+// OMP_IRB-NEXT:    [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT:    ret i32 [[TMP40]]
+//
+//
+// OMP_IRB-LABEL: @main..omp_par(
+// OMP_IRB-NEXT:  omp.par.entry:
+// OMP_IRB-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR:%.*]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// OMP_IRB-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_PAR_REGION:%.*]]
+// OMP_IRB:       omp.par.outlined.exit.exitStub:
+// OMP_IRB-NEXT:    ret void
+// OMP_IRB:       omp.par.region:
+// OMP_IRB-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
+// OMP_IRB-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
+// OMP_IRB-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
+// OMP_IRB:       2:
+// OMP_IRB-NEXT:    br label [[TMP3:%.*]]
+// OMP_IRB:       3:
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR:%.*]], align 4
+// OMP_IRB-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR:%.*]], align 8
+// OMP_IRB-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP_IRB-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// OMP_IRB-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX2]], align 1
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL4:%.*]]
+// OMP_IRB:       .cncl4:
+// OMP_IRB-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+// OMP_IRB:       .cont:
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_IRB-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8
+// OMP_IRB-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
+// OMP_IRB-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1
+// OMP_IRB-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP12]] to i32
+// OMP_IRB-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP9]]
+// OMP_IRB-NEXT:    [[CONV8:%.*]] = trunc i32 [[ADD]] to i8
+// OMP_IRB-NEXT:    store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1
+// OMP_IRB-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
+// OMP_IRB:       omp.par.pre_finalize:
+// OMP_IRB-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
+// OMP_IRB:       13:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// OMP_IRB-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// OMP_IRB:       .cncl:
+// OMP_IRB-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
+// OMP_IRB:       .split:
+// OMP_IRB-NEXT:    br label [[TMP3]]
+//
+//
+// OMP_IRB-LABEL: @.omp_task_entry.(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT:    [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8
+// OMP_IRB-NEXT:    [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT:    [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8
+// OMP_IRB-NEXT:    [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8
+// OMP_IRB-NEXT:    store i32 [[TMP0:%.*]], i32* [[DOTADDR]], align 4
+// OMP_IRB-NEXT:    store %struct.kmp_task_t_with_privates* [[TMP1:%.*]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon*
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8*
+// OMP_IRB-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
+// OMP_IRB-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
+// OMP_IRB-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
+// OMP_IRB-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
+// OMP_IRB-NEXT:    store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13
+// OMP_IRB-NEXT:    store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]]
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]]
+// OMP_IRB-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]]
+// OMP_IRB:       .cancel.exit.i:
+// OMP_IRB-NEXT:    store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT:    br label [[DOTOMP_OUTLINED__EXIT:%.*]]
+// OMP_IRB:       .cancel.continue.i:
+// OMP_IRB-NEXT:    store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT:    br label [[DOTOMP_OUTLINED__EXIT]]
+// OMP_IRB:       .omp_outlined..exit:
+// OMP_IRB-NEXT:    [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13
+// OMP_IRB-NEXT:    ret i32 0
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..1(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+// OMP_IRB-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB:       omp.inner.for.cond:
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
+// OMP_IRB-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB:       omp.inner.for.body:
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP_IRB-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP_IRB-NEXT:    ]
+// OMP_IRB:       .omp.sections.case:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
+// OMP_IRB:       .omp.sections.case.split:
+// OMP_IRB-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB:       .omp.sections.case.cncl:
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP_IRB:       .omp.sections.exit:
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB:       omp.inner.for.inc:
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// OMP_IRB-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB:       omp.inner.for.end:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+// OMP_IRB:       cancel.cont:
+// OMP_IRB-NEXT:    ret void
+// OMP_IRB:       cancel.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..2(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1
+// OMP_IRB-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB:       omp.inner.for.cond:
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4
+// OMP_IRB-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
+// OMP_IRB-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB:       omp.inner.for.body:
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [
+// OMP_IRB-NEXT:    i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]]
+// OMP_IRB-NEXT:    i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]]
+// OMP_IRB-NEXT:    ]
+// OMP_IRB:       .omp.sections.case:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3)
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]]
+// OMP_IRB:       .omp.sections.case.split:
+// OMP_IRB-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB:       .omp.sections.case.cncl:
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP_IRB:       .omp.sections.case2:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3)
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]]
+// OMP_IRB:       .omp.sections.case2.split:
+// OMP_IRB-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
+// OMP_IRB:       .omp.sections.case2.cncl:
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_END]]
+// OMP_IRB:       .omp.sections.exit:
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB:       omp.inner.for.inc:
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
+// OMP_IRB-NEXT:    store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB:       omp.inner.for.end:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+// OMP_IRB:       cancel.cont:
+// OMP_IRB-NEXT:    ret void
+// OMP_IRB:       cancel.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+//
+//
+// OMP_IRB-LABEL: @.omp_outlined..3(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[R_ADDR:%.*]] = alloca i32*, align 8
+// OMP_IRB-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[R3:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// OMP_IRB-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// OMP_IRB-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP_IRB-NEXT:    store i32* [[ARGC:%.*]], i32** [[ARGC_ADDR]], align 8
+// OMP_IRB-NEXT:    store i32* [[R:%.*]], i32** [[R_ADDR]], align 8
+// OMP_IRB-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
+// OMP_IRB-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
+// OMP_IRB-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// OMP_IRB-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// OMP_IRB-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// OMP_IRB-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// OMP_IRB-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// OMP_IRB:       omp.precond.then:
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// OMP_IRB-NEXT:    store i32 0, i32* [[R3]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// OMP_IRB-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// OMP_IRB:       cond.true:
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// OMP_IRB-NEXT:    br label [[COND_END:%.*]]
+// OMP_IRB:       cond.false:
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    br label [[COND_END]]
+// OMP_IRB:       cond.end:
+// OMP_IRB-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
+// OMP_IRB-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// OMP_IRB-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// OMP_IRB:       omp.inner.for.cond:
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// OMP_IRB-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// OMP_IRB-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// OMP_IRB:       omp.inner.for.body:
+// OMP_IRB-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// OMP_IRB-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// OMP_IRB-NEXT:    store i32 [[ADD]], i32* [[I4]], align 4
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]])
+// OMP_IRB-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2)
+// OMP_IRB-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// OMP_IRB-NEXT:    br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]]
+// OMP_IRB:       .cancel.exit:
+// OMP_IRB-NEXT:    br label [[CANCEL_EXIT:%.*]]
+// OMP_IRB:       .cancel.continue:
+// OMP_IRB-NEXT:    [[TMP16:%.*]] = load i32, i32* [[I4]], align 4
+// OMP_IRB-NEXT:    [[TMP17:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// OMP_IRB-NEXT:    store i32 [[ADD8]], i32* [[R3]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// OMP_IRB:       omp.body.continue:
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// OMP_IRB:       omp.inner.for.inc:
+// OMP_IRB-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1
+// OMP_IRB-NEXT:    store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4
+// OMP_IRB-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// OMP_IRB:       omp.inner.for.end:
+// OMP_IRB-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// OMP_IRB:       omp.loop.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// OMP_IRB-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// OMP_IRB-NEXT:    [[TMP20:%.*]] = bitcast i32* [[R3]] to i8*
+// OMP_IRB-NEXT:    store i8* [[TMP20]], i8** [[TMP19]], align 8
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
+// OMP_IRB-NEXT:    [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// OMP_IRB-NEXT:    [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP_IRB-NEXT:    switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// OMP_IRB-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// OMP_IRB-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// OMP_IRB-NEXT:    ]
+// OMP_IRB:       .omp.reduction.case1:
+// OMP_IRB-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4
+// OMP_IRB-NEXT:    [[TMP24:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
+// OMP_IRB-NEXT:    store i32 [[ADD13]], i32* [[TMP1]], align 4
+// OMP_IRB-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// OMP_IRB-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP_IRB:       cancel.exit:
+// OMP_IRB-NEXT:    [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]])
+// OMP_IRB-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]])
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT:%.*]]
+// OMP_IRB:       .omp.reduction.case2:
+// OMP_IRB-NEXT:    [[TMP25:%.*]] = load i32, i32* [[R3]], align 4
+// OMP_IRB-NEXT:    [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4
+// OMP_IRB-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// OMP_IRB:       .omp.reduction.default:
+// OMP_IRB-NEXT:    br label [[OMP_PRECOND_END]]
+// OMP_IRB:       omp.precond.end:
+// OMP_IRB-NEXT:    br label [[CANCEL_CONT]]
+// OMP_IRB:       cancel.cont:
+// OMP_IRB-NEXT:    ret void
+//
+//
+// OMP_IRB-LABEL: @.omp.reduction.reduction_func(
+// OMP_IRB-NEXT:  entry:
+// OMP_IRB-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// OMP_IRB-NEXT:    store i8* [[TMP0:%.*]], i8** [[DOTADDR]], align 8
+// OMP_IRB-NEXT:    store i8* [[TMP1:%.*]], i8** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// OMP_IRB-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// OMP_IRB-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// OMP_IRB-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// OMP_IRB-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// OMP_IRB-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// OMP_IRB-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// OMP_IRB-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// OMP_IRB-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// OMP_IRB-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// OMP_IRB-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// OMP_IRB-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// OMP_IRB-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// OMP_IRB-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
+// OMP_IRB-NEXT:    ret void
+//
+//
+// OMP_SIMD-LABEL: @main(
+// OMP_SIMD-NEXT:  entry:
+// OMP_SIMD-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
+// OMP_SIMD-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT:    [[R:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT:    [[I6:%.*]] = alloca i32, align 4
+// OMP_SIMD-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// OMP_SIMD-NEXT:    store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP0]] to i8
+// OMP_SIMD-NEXT:    [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0
+// OMP_SIMD-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// OMP_SIMD-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0
+// OMP_SIMD-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1
+// OMP_SIMD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    [[TMP4:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
+// OMP_SIMD-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0
+// OMP_SIMD-NEXT:    [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8
+// OMP_SIMD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0
+// OMP_SIMD-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ARRAYIDX3]], align 1
+// OMP_SIMD-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP6]] to i32
+// OMP_SIMD-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV4]], [[TMP3]]
+// OMP_SIMD-NEXT:    [[CONV5:%.*]] = trunc i32 [[ADD]] to i8
+// OMP_SIMD-NEXT:    store i8 [[CONV5]], i8* [[ARRAYIDX3]], align 1
+// OMP_SIMD-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP_SIMD-NEXT:    br label [[FOR_COND:%.*]]
+// OMP_SIMD:       for.cond:
+// OMP_SIMD-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
+// OMP_SIMD-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]]
+// OMP_SIMD-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// OMP_SIMD:       for.body:
+// OMP_SIMD-NEXT:    br label [[FOR_INC:%.*]]
+// OMP_SIMD:       for.inc:
+// OMP_SIMD-NEXT:    [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+// OMP_SIMD-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// OMP_SIMD-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// OMP_SIMD-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// OMP_SIMD:       for.end:
+// OMP_SIMD-NEXT:    store i32 0, i32* [[R]], align 4
+// OMP_SIMD-NEXT:    store i32 0, i32* [[I6]], align 4
+// OMP_SIMD-NEXT:    br label [[FOR_COND7:%.*]]
+// OMP_SIMD:       for.cond7:
+// OMP_SIMD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// OMP_SIMD-NEXT:    br i1 [[CMP8]], label [[FOR_BODY9:%.*]], label [[FOR_END13:%.*]]
+// OMP_SIMD:       for.body9:
+// OMP_SIMD-NEXT:    [[TMP12:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT:    [[TMP13:%.*]] = load i32, i32* [[R]], align 4
+// OMP_SIMD-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// OMP_SIMD-NEXT:    store i32 [[ADD10]], i32* [[R]], align 4
+// OMP_SIMD-NEXT:    br label [[FOR_INC11:%.*]]
+// OMP_SIMD:       for.inc11:
+// OMP_SIMD-NEXT:    [[TMP14:%.*]] = load i32, i32* [[I6]], align 4
+// OMP_SIMD-NEXT:    [[INC12:%.*]] = add nsw i32 [[TMP14]], 1
+// OMP_SIMD-NEXT:    store i32 [[INC12]], i32* [[I6]], align 4
+// OMP_SIMD-NEXT:    br label [[FOR_COND7]], !llvm.loop [[LOOP4:![0-9]+]]
+// OMP_SIMD:       for.end13:
+// OMP_SIMD-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// OMP_SIMD-NEXT:    ret i32 [[TMP15]]
+//


        


More information about the cfe-commits mailing list