[clang] 1fff116 - [OpenMP] Change OpenMP code generation for target region entries

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 8 11:44:50 PDT 2022


Author: Joseph Huber
Date: 2022-07-08T14:44:11-04:00
New Revision: 1fff116645b3ee108e8d8edbd5b90bed8304a4a8

URL: https://github.com/llvm/llvm-project/commit/1fff116645b3ee108e8d8edbd5b90bed8304a4a8
DIFF: https://github.com/llvm/llvm-project/commit/1fff116645b3ee108e8d8edbd5b90bed8304a4a8.diff

LOG: [OpenMP] Change OpenMP code generation for target region entries

This patch changes the code we generate to enter a target region on the
device. This is in-line with the new definition in the runtime that was
added previously. Additionally we implement this in the OpenMPIRBuilder
so that this code can be shared with Flang in the future.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D128550

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/capturing_in_templates.cpp
    clang/test/OpenMP/declare_mapper_codegen.cpp
    clang/test/OpenMP/declare_target_link_codegen.cpp
    clang/test/OpenMP/distribute_codegen.cpp
    clang/test/OpenMP/distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/distribute_private_codegen.cpp
    clang/test/OpenMP/distribute_simd_codegen.cpp
    clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_simd_private_codegen.cpp
    clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/nvptx_lambda_capturing.cpp
    clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
    clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
    clang/test/OpenMP/openmp_offload_codegen.cpp
    clang/test/OpenMP/reduction_implicit_map.cpp
    clang/test/OpenMP/target_codegen.cpp
    clang/test/OpenMP/target_codegen_global_capture.cpp
    clang/test/OpenMP/target_data_member_codegen.cpp
    clang/test/OpenMP/target_data_use_device_ptr_if_codegen.cpp
    clang/test/OpenMP/target_defaultmap_codegen_01.cpp
    clang/test/OpenMP/target_defaultmap_codegen_02.cpp
    clang/test/OpenMP/target_depend_codegen.cpp
    clang/test/OpenMP/target_device_codegen.cpp
    clang/test/OpenMP/target_firstprivate_codegen.cpp
    clang/test/OpenMP/target_is_device_ptr_codegen.cpp
    clang/test/OpenMP/target_map_codegen_00.cpp
    clang/test/OpenMP/target_map_codegen_01.cpp
    clang/test/OpenMP/target_map_codegen_02.cpp
    clang/test/OpenMP/target_map_codegen_03.cpp
    clang/test/OpenMP/target_map_codegen_04.cpp
    clang/test/OpenMP/target_map_codegen_05.cpp
    clang/test/OpenMP/target_map_codegen_06.cpp
    clang/test/OpenMP/target_map_codegen_07.cpp
    clang/test/OpenMP/target_map_codegen_08.cpp
    clang/test/OpenMP/target_map_codegen_09.cpp
    clang/test/OpenMP/target_map_codegen_10.cpp
    clang/test/OpenMP/target_map_codegen_11.cpp
    clang/test/OpenMP/target_map_codegen_12.cpp
    clang/test/OpenMP/target_map_codegen_13.cpp
    clang/test/OpenMP/target_map_codegen_14.cpp
    clang/test/OpenMP/target_map_codegen_15.cpp
    clang/test/OpenMP/target_map_codegen_16.cpp
    clang/test/OpenMP/target_map_codegen_17.cpp
    clang/test/OpenMP/target_map_codegen_18.inc
    clang/test/OpenMP/target_map_codegen_19.cpp
    clang/test/OpenMP/target_map_codegen_20.cpp
    clang/test/OpenMP/target_map_codegen_21.cpp
    clang/test/OpenMP/target_map_codegen_22.cpp
    clang/test/OpenMP/target_map_codegen_23.cpp
    clang/test/OpenMP/target_map_codegen_24.cpp
    clang/test/OpenMP/target_map_codegen_25.cpp
    clang/test/OpenMP/target_map_codegen_26.cpp
    clang/test/OpenMP/target_map_codegen_27.cpp
    clang/test/OpenMP/target_map_codegen_28.cpp
    clang/test/OpenMP/target_map_codegen_29.cpp
    clang/test/OpenMP/target_map_codegen_30.cpp
    clang/test/OpenMP/target_map_codegen_31.cpp
    clang/test/OpenMP/target_map_codegen_32.cpp
    clang/test/OpenMP/target_map_codegen_33.cpp
    clang/test/OpenMP/target_map_codegen_34.cpp
    clang/test/OpenMP/target_map_codegen_35.cpp
    clang/test/OpenMP/target_map_codegen_hold.cpp
    clang/test/OpenMP/target_map_names.cpp
    clang/test/OpenMP/target_map_names_attr.cpp
    clang/test/OpenMP/target_offload_mandatory_codegen.cpp
    clang/test/OpenMP/target_parallel_codegen.cpp
    clang/test/OpenMP/target_parallel_depend_codegen.cpp
    clang/test/OpenMP/target_parallel_for_codegen.cpp
    clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
    clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
    clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_parallel_if_codegen.cpp
    clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
    clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_simd_codegen.cpp
    clang/test/OpenMP/target_simd_depend_codegen.cpp
    clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_teams_codegen.cpp
    clang/test/OpenMP/target_teams_depend_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_teams_map_codegen.cpp
    clang/test/OpenMP/target_teams_num_teams_codegen.cpp
    clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
    clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
    clang/test/OpenMP/target_uses_allocators_codegen.cpp
    clang/test/OpenMP/teams_codegen.cpp
    clang/test/OpenMP/teams_distribute_codegen.cpp
    clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/teams_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_private_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 305040b01c088..e412e3930b228 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6717,11 +6717,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
     default:
       break;
     }
-  } else if (DefaultNT == -1) {
-    return nullptr;
   }
 
-  return Bld.getInt32(DefaultNT);
+  return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
 }
 
 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
@@ -10311,23 +10309,29 @@ void CGOpenMPRuntime::emitTargetCall(
     // Emit tripcount for the target loop-based directive.
     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
 
-    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
+    // Arguments for the target kernel.
+    SmallVector<llvm::Value *> KernelArgs{
+        CGF.Builder.getInt32(/* Version */ 1),
+        PointerNum,
+        InputInfo.BasePointersArray.getPointer(),
+        InputInfo.PointersArray.getPointer(),
+        InputInfo.SizesArray.getPointer(),
+        MapTypesArray,
+        MapNamesArray,
+        InputInfo.MappersArray.getPointer()};
+
+    // Arguments passed to the 'nowait' variant.
+    SmallVector<llvm::Value *> NoWaitKernelArgs{
+        CGF.Builder.getInt32(0),
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        CGF.Builder.getInt32(0),
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+    };
+
+    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+
     // The target region is an outlined function launched by the runtime
-    // via calls __tgt_target() or __tgt_target_teams().
-    //
-    // __tgt_target() launches a target region with one team and one thread,
-    // executing a serial region.  This master thread may in turn launch
-    // more threads within its team upon encountering a parallel region,
-    // however, no additional teams can be launched on the device.
-    //
-    // __tgt_target_teams() launches a target region with one or more teams,
-    // each with one or more threads.  This call is required for target
-    // constructs such as:
-    //  'target teams'
-    //  'target' / 'teams'
-    //  'target teams distribute parallel for'
-    //  'target parallel'
-    // and so on.
+    // via calls to __tgt_target_kernel().
     //
     // Note that on the host and CPU targets, the runtime implementation of
     // these calls simply call the outlined function without forking threads.
@@ -10338,70 +10342,15 @@ void CGOpenMPRuntime::emitTargetCall(
     // In contrast, on the NVPTX target, the implementation of
     // __tgt_target_teams() launches a GPU kernel with the requested number
     // of teams and threads so no additional calls to the runtime are required.
-    if (NumTeams) {
-      // If we have NumTeams defined this means that we have an enclosed teams
-      // region. Therefore we also expect to have NumThreads defined. These two
-      // values should be defined in the presence of a teams directive,
-      // regardless of having any clauses associated. If the user is using teams
-      // but no clauses, these two values will be the default that should be
-      // passed to the runtime library - a 32-bit integer with the value zero.
-      assert(NumThreads && "Thread limit expression should be available along "
-                           "with number of teams.");
-      SmallVector<llvm::Value *> OffloadingArgs = {
-          RTLoc,
-          DeviceID,
-          OutlinedFnID,
-          PointerNum,
-          InputInfo.BasePointersArray.getPointer(),
-          InputInfo.PointersArray.getPointer(),
-          InputInfo.SizesArray.getPointer(),
-          MapTypesArray,
-          MapNamesArray,
-          InputInfo.MappersArray.getPointer(),
-          NumTeams,
-          NumThreads};
-      if (HasNowait) {
-        // Add int32_t depNum = 0, void *depList = nullptr, int32_t
-        // noAliasDepNum = 0, void *noAliasDepList = nullptr.
-        OffloadingArgs.push_back(CGF.Builder.getInt32(0));
-        OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
-        OffloadingArgs.push_back(CGF.Builder.getInt32(0));
-        OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
-      }
-      Return = CGF.EmitRuntimeCall(
-          OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), HasNowait
-                                   ? OMPRTL___tgt_target_teams_nowait_mapper
-                                   : OMPRTL___tgt_target_teams_mapper),
-          OffloadingArgs);
-    } else {
-      SmallVector<llvm::Value *> OffloadingArgs = {
-          RTLoc,
-          DeviceID,
-          OutlinedFnID,
-          PointerNum,
-          InputInfo.BasePointersArray.getPointer(),
-          InputInfo.PointersArray.getPointer(),
-          InputInfo.SizesArray.getPointer(),
-          MapTypesArray,
-          MapNamesArray,
-          InputInfo.MappersArray.getPointer()};
-      if (HasNowait) {
-        // Add int32_t depNum = 0, void *depList = nullptr, int32_t
-        // noAliasDepNum = 0, void *noAliasDepList = nullptr.
-        OffloadingArgs.push_back(CGF.Builder.getInt32(0));
-        OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
-        OffloadingArgs.push_back(CGF.Builder.getInt32(0));
-        OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
-      }
-      Return = CGF.EmitRuntimeCall(
-          OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
-                                         : OMPRTL___tgt_target_mapper),
-          OffloadingArgs);
-    }
-
     // Check the error code and execute the host version if required.
+    CGF.Builder.restoreIP(
+        HasNoWait ? OMPBuilder.emitTargetKernel(
+                        CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
+                        NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
+                  : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
+                                                DeviceID, NumTeams, NumThreads,
+                                                OutlinedFnID, KernelArgs));
+
     llvm::BasicBlock *OffloadFailedBlock =
         CGF.createBasicBlock("omp_offload.failed");
     llvm::BasicBlock *OffloadContBlock =

diff  --git a/clang/test/OpenMP/capturing_in_templates.cpp b/clang/test/OpenMP/capturing_in_templates.cpp
index 9423996a05641..f270775e0b9dc 100644
--- a/clang/test/OpenMP/capturing_in_templates.cpp
+++ b/clang/test/OpenMP/capturing_in_templates.cpp
@@ -24,7 +24,7 @@ pair<T1, T2> make_pair(T1 &&t1, T2 &&t2) {
 
 // CHECK-LABEL: @main
 int main(int argc, char **argv) {
-// CHECK: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// CHECK: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}})
 #pragma omp target
  {
     for (int i = 0; i < 64; ++i) {

diff  --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp
index 0c8c411c3b19e..aef2fcd7305c3 100644
--- a/clang/test/OpenMP/declare_mapper_codegen.cpp
+++ b/clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -243,20 +243,27 @@ void foo(int a){
   C c;
   c.a = a;
 
-  // CK0-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]])
-  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
-  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
-  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
-  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
-  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
-  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
-  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
-  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
-  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
-  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
-  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
-  // CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]])
-  #pragma omp target map(mapper(id),tofrom: c)
+// CK0-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
+// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
+// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
+// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
+// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
+// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
+// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
+// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+// CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]])
+#pragma omp target map(mapper(id), tofrom \
+                       : c)
   {
     ++c.a;
   }
@@ -282,20 +289,27 @@ void foo(int a){
     ++c.a;
   }
 
-  // CK0-DAG: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMSIZES]]{{.+}}, {{.+}}[[TEAMTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i32 0)
-  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
-  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
-  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
-  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
-  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
-  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
-  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
-  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
-  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
-  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
-  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
-  // CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]])
-  #pragma omp target teams map(mapper(id),to: c)
+// CK0-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
+// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
+// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
+// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
+// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
+// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
+// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
+// CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+// CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+// CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+// CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+// CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+// CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+// CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+// CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+// CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+// CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+// CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+// CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]])
+#pragma omp target teams map(mapper(id), to \
+                             : c)
   {
     ++c.a;
   }
@@ -493,7 +507,15 @@ void foo(int a){
 // CK0: }
 
 // CK0: define internal void [[OUTLINED:@.+]](i32 {{.*}}{{[^,]+}}, [[ANON_T]]* noalias noundef [[CTXARG:%.+]])
-// CK0-DAG: call i32 @__tgt_target_nowait_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZEGEP:%[0-9]+]], {{.+}}[[NWTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i8* null, i32 0, i8* null)
+// CK0-DAG: call i32 @__tgt_target_kernel_nowait(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]], i32 0, i8* null, i32 0, i8* null)
+// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
+// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
+// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
+// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
+// CK0-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CK0-DAG: store i64* [[SIZEGEP:%.+]], i64** [[SARG]]
+// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
+// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
 // CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
 // CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
 // CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
@@ -525,7 +547,15 @@ void foo(int a){
 // CK0: }
 
 // CK0: define internal void [[OUTLINE_1:@.+]](i32 {{.*}}%.global_tid.{{.+}}, [[ANON_T_0]]* noalias noundef [[CTXARG:%.+]])
-// CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZEGEP:%[0-9]+]], {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]], i32 0, i32 0, i32 0, i8* null, i32 0, i8* null)
+// CK0-DAG: call i32 @__tgt_target_kernel_nowait(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]], i32 0, i8* null, i32 0, i8* null)
+// CK0-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
+// CK0-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
+// CK0-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
+// CK0-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
+// CK0-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CK0-DAG: store i64* [[SIZEGEP:%.+]], i64** [[SARG]]
+// CK0-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
+// CK0-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
 // CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
 // CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
 // CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0
@@ -902,29 +932,36 @@ void foo(int a){
 
   // CK3-DAG: [[BC:%.+]] = getelementptr inbounds %class.B, %class.B* [[BVAL]], i32 0, i32 0
 
-  // CK3-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i8** [[MPRGEP:%.+]])
-  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
-  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
-  // CK3-DAG: [[MPRGEP]] = bitcast [2 x i8*]* [[MPR:%[^,]+]] to i8**
-  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
-  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
-  // CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
-  // CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
-  // CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
-  // CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
-  // CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
-  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
-  // CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
-  // CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
-  // CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [10 x %class.C]**
-  // CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
-  // CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP2]]
-  // CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
-  // CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP2]]
-  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
-  // CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
-  #pragma omp target map(mapper(id),tofrom: c[0:10], b.c)
+// CK3-DAG: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]])
+// CK3-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2
+// CK3-DAG: store i8** [[BPGEP:%.+]], i8*** [[BPARG]]
+// CK3-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3
+// CK3-DAG: store i8** [[PGEP:%.+]], i8*** [[PARG]]
+// CK3-DAG: [[MARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 7
+// CK3-DAG: store i8** [[MPRGEP:%.+]], i8*** [[MARG]]
+// CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+// CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+// CK3-DAG: [[MPRGEP]] = bitcast [2 x i8*]* [[MPR:%[^,]+]] to i8**
+// CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+// CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+// CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
+// CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
+// CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+// CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
+// CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
+// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+// CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+// CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+// CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
+// CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [10 x %class.C]**
+// CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
+// CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP2]]
+// CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
+// CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP2]]
+// CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
+// CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
+#pragma omp target map(mapper(id), tofrom \
+                       : c [0:10], b.c)
   for (int i = 0; i < 10; i++) {
     b.c.a += ++c[i].a;
   }

diff  --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp
index ca4ba07c65f8e..3e6ca6a542b8b 100644
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@@ -77,9 +77,9 @@ int maini1() {
 
 // HOST: [[BP0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // HOST: [[P0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// HOST: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP0]], i8** [[P0]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPTYPES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i8** null, i8** null)
+// HOST: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}})
 // HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* %{{[^,]+}})
-// HOST: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @.__omp_offloading_{{.+}}_l47.region_id, i32 2, {{.+}})
+// HOST: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}})
 
 // HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{.*}})
 // HOST: [[C:%.*]] = load i32, i32* @c,

diff  --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 855efca9999ea..ab3a576e691c2 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -170,9 +170,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 4571424)
-// CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK1-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP31]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP33]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK1-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -343,9 +360,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 4571424)
-// CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK1-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP31]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP33]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK1-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -516,9 +550,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 16908289)
-// CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.5, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK1-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.5, i32 0, i32 0), i64** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP31]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP33]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK1-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -692,9 +743,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[TMP12:%.*]] = zext i32 [[ADD5]] to i64
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP12]])
-// CHECK1-NEXT:    [[TMP13:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-// CHECK1-NEXT:    br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP13]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 1, i32* [[TMP14]], align 4
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP7]], i8*** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP8]], i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP18]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP19]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// CHECK1-NEXT:    br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92(i64 [[TMP1]]) #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -837,9 +905,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
-// CHECK1-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP9]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP7]], i8*** [[TMP11]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP8]], i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK1-NEXT:    br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108(i64 [[TMP1]]) #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1000,9 +1085,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 4571424)
-// CHECK3-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK3-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 4
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 4
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP31]], align 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP33]], align 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK3-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1169,9 +1271,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 4571424)
-// CHECK3-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK3-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 4
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 4
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP31]], align 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP33]], align 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK3-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1338,9 +1457,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 16908289)
-// CHECK3-NEXT:    [[TMP26:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, i32 4, i8** [[TMP24]], i8** [[TMP25]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.5, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
-// CHECK3-NEXT:    br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 1, i32* [[TMP26]], align 4
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 4, i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8** [[TMP24]], i8*** [[TMP28]], align 4
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8** [[TMP25]], i8*** [[TMP29]], align 4
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.5, i32 0, i32 0), i64** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP31]], align 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP33]], align 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0
+// CHECK3-NEXT:    br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80(float* [[TMP0]], float* [[TMP1]], float* [[TMP2]], float* [[TMP3]]) #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1510,9 +1646,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1
 // CHECK3-NEXT:    [[TMP12:%.*]] = zext i32 [[ADD5]] to i64
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP12]])
-// CHECK3-NEXT:    [[TMP13:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-// CHECK3-NEXT:    br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 1, i32* [[TMP13]], align 4
+// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 1, i32* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8** [[TMP7]], i8*** [[TMP15]], align 4
+// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8** [[TMP8]], i8*** [[TMP16]], align 4
+// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP17]], align 4
+// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP18]], align 4
+// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP19]], align 4
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// CHECK3-NEXT:    br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92(i32 [[TMP1]]) #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1655,9 +1808,26 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 100)
-// CHECK3-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
-// CHECK3-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 1, i32* [[TMP9]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8** [[TMP7]], i8*** [[TMP11]], align 4
+// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8** [[TMP8]], i8*** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64** [[TMP13]], align 4
+// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i64** [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP15]], align 4
+// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store i8** null, i8*** [[TMP16]], align 4
+// CHECK3-NEXT:    [[TMP17:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK3-NEXT:    br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108(i32 [[TMP1]]) #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]

diff  --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
index 4abfc4fbc45ea..79e1e0e46fde5 100644
--- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -573,9 +573,26 @@ int main() {
 // CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK9-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 8
+// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP43]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK9-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i64 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -583,18 +600,18 @@ int main() {
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done3:
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP39]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -864,27 +881,44 @@ int main() {
 // CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK9-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK9-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done2:
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP32]]
+// CHECK9-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1235,9 +1269,26 @@ int main() {
 // CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK11-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 4
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP42]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i32 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1245,18 +1296,18 @@ int main() {
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP39]]
+// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1521,27 +1572,44 @@ int main() {
 // CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK11-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK11-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP32]]
+// CHECK11-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev

diff  --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
index 7420e1b44ec86..ffa4b4228e9ff 100644
--- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -558,9 +558,26 @@ int main() {
 // CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK9-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 8
+// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP43]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK9-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i64 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -568,18 +585,18 @@ int main() {
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done3:
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP39]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -867,27 +884,44 @@ int main() {
 // CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK9-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK9-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done2:
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP32]]
+// CHECK9-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1257,9 +1291,26 @@ int main() {
 // CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK11-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 4
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP42]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i32 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1267,18 +1318,18 @@ int main() {
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP39]]
+// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1561,27 +1612,44 @@ int main() {
 // CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK11-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.2, i32 0, i32 0), i64** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.3, i32 0, i32 0), i64** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB2]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK11-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP32]]
+// CHECK11-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev

diff  --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index 5402da88c058f..85612f4631fab 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -4310,43 +4310,43 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS21:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS22:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS23:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED32:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP37:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED46:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED48:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_54:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED62:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS64:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS65:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS66:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP67:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_68:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_69:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED76:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED78:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS80:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS81:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS82:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP83:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_84:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_85:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED49:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED51:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS53:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS54:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS55:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED66:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS68:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS69:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS70:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP71:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_72:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_73:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED81:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED83:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS85:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS86:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS87:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP88:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_89:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_90:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK9-NEXT:    store i32 100, i32* [[CH]], align 4
@@ -4402,403 +4402,522 @@ int main() {
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369(i64 [[TMP1]], double* [[TMP2]], double* [[TMP3]], double* [[TMP4]]) #[[ATTR2:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP37:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to double**
-// CHECK9-NEXT:    store double* [[TMP35]], double** [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to double**
-// CHECK9-NEXT:    store double* [[TMP35]], double** [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
-// CHECK9-NEXT:    store double* [[TMP36]], double** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK9-NEXT:    store double* [[TMP36]], double** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    store i32 [[TMP41]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP49]], align 8
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
+// CHECK9-NEXT:    store double* [[TMP43]], double** [[TMP52]], align 8
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK9-NEXT:    store double* [[TMP37]], double** [[TMP54]], align 8
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to double**
-// CHECK9-NEXT:    store double* [[TMP37]], double** [[TMP56]], align 8
-// CHECK9-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK9-NEXT:    store double* [[TMP43]], double** [[TMP54]], align 8
+// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP55]], align 8
+// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
+// CHECK9-NEXT:    store double* [[TMP44]], double** [[TMP57]], align 8
+// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to double**
+// CHECK9-NEXT:    store double* [[TMP44]], double** [[TMP59]], align 8
+// CHECK9-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP60]], align 8
+// CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double**
+// CHECK9-NEXT:    store double* [[TMP45]], double** [[TMP62]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to double**
+// CHECK9-NEXT:    store double* [[TMP45]], double** [[TMP64]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP65]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK9-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK9-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK9-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK9-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK9-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK9-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
-// CHECK9:       omp_offload.failed15:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408(i64 [[TMP34]], double* [[TMP35]], double* [[TMP36]], double* [[TMP37]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
-// CHECK9:       omp_offload.cont16:
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV17:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV17]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[CH_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to double**
-// CHECK9-NEXT:    store double* [[TMP70]], double** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to double**
-// CHECK9-NEXT:    store double* [[TMP70]], double** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
-// CHECK9-NEXT:    store double* [[TMP71]], double** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to double**
-// CHECK9-NEXT:    store double* [[TMP71]], double** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK9-NEXT:    store double* [[TMP72]], double** [[TMP94]], align 8
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to double**
-// CHECK9-NEXT:    store double* [[TMP72]], double** [[TMP96]], align 8
-// CHECK9-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK9-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
-// CHECK9-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK9-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
-// CHECK9:       omp_offload.failed30:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447(i64 [[TMP67]], i64 [[TMP69]], double* [[TMP70]], double* [[TMP71]], double* [[TMP72]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
-// CHECK9:       omp_offload.cont31:
-// CHECK9-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV33:%.*]] = bitcast i64* [[N_CASTED32]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP106]], i32* [[CONV33]], align 4
-// CHECK9-NEXT:    [[TMP107:%.*]] = load i64, i64* [[N_CASTED32]], align 8
-// CHECK9-NEXT:    [[TMP108:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP109:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP110:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP112]], align 8
-// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP114]], align 8
-// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK9-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to double**
-// CHECK9-NEXT:    store double* [[TMP108]], double** [[TMP117]], align 8
-// CHECK9-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to double**
-// CHECK9-NEXT:    store double* [[TMP108]], double** [[TMP119]], align 8
-// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP120]], align 8
-// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to double**
-// CHECK9-NEXT:    store double* [[TMP109]], double** [[TMP122]], align 8
-// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to double**
-// CHECK9-NEXT:    store double* [[TMP109]], double** [[TMP124]], align 8
-// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP125]], align 8
-// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
-// CHECK9-NEXT:    store double* [[TMP110]], double** [[TMP127]], align 8
-// CHECK9-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
-// CHECK9-NEXT:    store double* [[TMP110]], double** [[TMP129]], align 8
-// CHECK9-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK9-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK9-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
-// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
-// CHECK9-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK9-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK9-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK9-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
-// CHECK9:       omp_offload.failed44:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478(i64 [[TMP107]], double* [[TMP108]], double* [[TMP109]], double* [[TMP110]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
-// CHECK9:       omp_offload.cont45:
-// CHECK9-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV47:%.*]] = bitcast i64* [[CH_CASTED46]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP139]], i32* [[CONV47]], align 4
-// CHECK9-NEXT:    [[TMP140:%.*]] = load i64, i64* [[CH_CASTED46]], align 8
-// CHECK9-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV49:%.*]] = bitcast i64* [[N_CASTED48]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP141]], i32* [[CONV49]], align 4
-// CHECK9-NEXT:    [[TMP142:%.*]] = load i64, i64* [[N_CASTED48]], align 8
-// CHECK9-NEXT:    [[TMP143:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP144:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP145:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP147]], align 8
-// CHECK9-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP149]], align 8
-// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP150]], align 8
-// CHECK9-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP152]], align 8
-// CHECK9-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP154]], align 8
-// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to double**
-// CHECK9-NEXT:    store double* [[TMP143]], double** [[TMP157]], align 8
-// CHECK9-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to double**
-// CHECK9-NEXT:    store double* [[TMP143]], double** [[TMP159]], align 8
-// CHECK9-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to double**
-// CHECK9-NEXT:    store double* [[TMP144]], double** [[TMP162]], align 8
-// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to double**
-// CHECK9-NEXT:    store double* [[TMP144]], double** [[TMP164]], align 8
-// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP165]], align 8
-// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to double**
-// CHECK9-NEXT:    store double* [[TMP145]], double** [[TMP167]], align 8
-// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to double**
-// CHECK9-NEXT:    store double* [[TMP145]], double** [[TMP169]], align 8
-// CHECK9-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP170]], align 8
-// CHECK9-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]])
+// CHECK9-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK9-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9:       omp_offload.failed16:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408(i64 [[TMP42]], double* [[TMP43]], double* [[TMP44]], double* [[TMP45]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
+// CHECK9:       omp_offload.cont17:
+// CHECK9-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP82]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP83:%.*]] = load i64, i64* [[CH_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP90]], align 8
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP92]], align 8
+// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP93]], align 8
+// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP95]], align 8
+// CHECK9-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP97]], align 8
+// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP98]], align 8
+// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to double**
+// CHECK9-NEXT:    store double* [[TMP86]], double** [[TMP100]], align 8
+// CHECK9-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to double**
+// CHECK9-NEXT:    store double* [[TMP86]], double** [[TMP102]], align 8
+// CHECK9-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP103]], align 8
+// CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to double**
+// CHECK9-NEXT:    store double* [[TMP87]], double** [[TMP105]], align 8
+// CHECK9-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to double**
+// CHECK9-NEXT:    store double* [[TMP87]], double** [[TMP107]], align 8
+// CHECK9-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK9-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to double**
+// CHECK9-NEXT:    store double* [[TMP88]], double** [[TMP110]], align 8
+// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to double**
+// CHECK9-NEXT:    store double* [[TMP88]], double** [[TMP112]], align 8
+// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK9-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
+// CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK9-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK9-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 8
+// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 8
+// CHECK9-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP124]], align 8
+// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP125]], align 8
+// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP126]], align 8
+// CHECK9-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP127]], align 8
+// CHECK9-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]])
+// CHECK9-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK9-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9:       omp_offload.failed32:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447(i64 [[TMP83]], i64 [[TMP85]], double* [[TMP86]], double* [[TMP87]], double* [[TMP88]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
+// CHECK9:       omp_offload.cont33:
+// CHECK9-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP130]], i32* [[CONV35]], align 4
+// CHECK9-NEXT:    [[TMP131:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK9-NEXT:    [[TMP132:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP133:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP134:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP136]], align 8
+// CHECK9-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP138]], align 8
+// CHECK9-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP139]], align 8
+// CHECK9-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
+// CHECK9-NEXT:    store double* [[TMP132]], double** [[TMP141]], align 8
+// CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
+// CHECK9-NEXT:    store double* [[TMP132]], double** [[TMP143]], align 8
+// CHECK9-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP144]], align 8
+// CHECK9-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to double**
+// CHECK9-NEXT:    store double* [[TMP133]], double** [[TMP146]], align 8
+// CHECK9-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to double**
+// CHECK9-NEXT:    store double* [[TMP133]], double** [[TMP148]], align 8
+// CHECK9-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP149]], align 8
+// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to double**
+// CHECK9-NEXT:    store double* [[TMP134]], double** [[TMP151]], align 8
+// CHECK9-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to double**
+// CHECK9-NEXT:    store double* [[TMP134]], double** [[TMP153]], align 8
+// CHECK9-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK9-NEXT:    [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1
+// CHECK9-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[DIV43]], 1
+// CHECK9-NEXT:    store i32 [[SUB44]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK9-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD45]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK9-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK9-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 8
+// CHECK9-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 8
+// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64** [[TMP165]], align 8
+// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i64** [[TMP166]], align 8
+// CHECK9-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP167]], align 8
+// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP168]], align 8
+// CHECK9-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]])
+// CHECK9-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK9-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK9:       omp_offload.failed47:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478(i64 [[TMP131]], double* [[TMP132]], double* [[TMP133]], double* [[TMP134]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
+// CHECK9:       omp_offload.cont48:
+// CHECK9-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV50:%.*]] = bitcast i64* [[CH_CASTED49]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP171]], i32* [[CONV50]], align 4
+// CHECK9-NEXT:    [[TMP172:%.*]] = load i64, i64* [[CH_CASTED49]], align 8
 // CHECK9-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK9-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
-// CHECK9-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
-// CHECK9-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK9-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK9-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK9-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
-// CHECK9:       omp_offload.failed60:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506(i64 [[TMP140]], i64 [[TMP142]], double* [[TMP143]], double* [[TMP144]], double* [[TMP145]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
-// CHECK9:       omp_offload.cont61:
-// CHECK9-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV63:%.*]] = bitcast i64* [[N_CASTED62]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP179]], i32* [[CONV63]], align 4
-// CHECK9-NEXT:    [[TMP180:%.*]] = load i64, i64* [[N_CASTED62]], align 8
-// CHECK9-NEXT:    [[TMP181:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP182:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP183:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP185]], align 8
-// CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP187]], align 8
-// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP188]], align 8
-// CHECK9-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to double**
-// CHECK9-NEXT:    store double* [[TMP181]], double** [[TMP190]], align 8
-// CHECK9-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to double**
-// CHECK9-NEXT:    store double* [[TMP181]], double** [[TMP192]], align 8
-// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP193]], align 8
-// CHECK9-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to double**
-// CHECK9-NEXT:    store double* [[TMP182]], double** [[TMP195]], align 8
-// CHECK9-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to double**
-// CHECK9-NEXT:    store double* [[TMP182]], double** [[TMP197]], align 8
-// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP198]], align 8
-// CHECK9-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to double**
-// CHECK9-NEXT:    store double* [[TMP183]], double** [[TMP200]], align 8
-// CHECK9-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to double**
-// CHECK9-NEXT:    store double* [[TMP183]], double** [[TMP202]], align 8
-// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP203]], align 8
-// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[SUB70:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK9-NEXT:    [[DIV71:%.*]] = sdiv i32 [[SUB70]], 1
-// CHECK9-NEXT:    [[SUB72:%.*]] = sub nsw i32 [[DIV71]], 1
-// CHECK9-NEXT:    store i32 [[SUB72]], i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[ADD73:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK9-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD73]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK9-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK9-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED74:%.*]], label [[OMP_OFFLOAD_CONT75:%.*]]
-// CHECK9:       omp_offload.failed74:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536(i64 [[TMP180]], double* [[TMP181]], double* [[TMP182]], double* [[TMP183]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT75]]
-// CHECK9:       omp_offload.cont75:
-// CHECK9-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV77:%.*]] = bitcast i64* [[CH_CASTED76]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP212]], i32* [[CONV77]], align 4
-// CHECK9-NEXT:    [[TMP213:%.*]] = load i64, i64* [[CH_CASTED76]], align 8
-// CHECK9-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV79:%.*]] = bitcast i64* [[N_CASTED78]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP214]], i32* [[CONV79]], align 4
-// CHECK9-NEXT:    [[TMP215:%.*]] = load i64, i64* [[N_CASTED78]], align 8
-// CHECK9-NEXT:    [[TMP216:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP217:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP218:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP220]], align 8
-// CHECK9-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP222]], align 8
-// CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP223]], align 8
-// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 1
+// CHECK9-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[CONV52]], align 4
+// CHECK9-NEXT:    [[TMP174:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK9-NEXT:    [[TMP175:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP176:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP177:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP179]], align 8
+// CHECK9-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP181]], align 8
+// CHECK9-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP182]], align 8
+// CHECK9-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP184]], align 8
+// CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP186]], align 8
+// CHECK9-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP187]], align 8
+// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to double**
+// CHECK9-NEXT:    store double* [[TMP175]], double** [[TMP189]], align 8
+// CHECK9-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to double**
+// CHECK9-NEXT:    store double* [[TMP175]], double** [[TMP191]], align 8
+// CHECK9-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP192]], align 8
+// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to double**
+// CHECK9-NEXT:    store double* [[TMP176]], double** [[TMP194]], align 8
+// CHECK9-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to double**
+// CHECK9-NEXT:    store double* [[TMP176]], double** [[TMP196]], align 8
+// CHECK9-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP197]], align 8
+// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to double**
+// CHECK9-NEXT:    store double* [[TMP177]], double** [[TMP199]], align 8
+// CHECK9-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to double**
+// CHECK9-NEXT:    store double* [[TMP177]], double** [[TMP201]], align 8
+// CHECK9-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP202]], align 8
+// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[SUB59:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK9-NEXT:    [[DIV60:%.*]] = sdiv i32 [[SUB59]], 1
+// CHECK9-NEXT:    [[SUB61:%.*]] = sub nsw i32 [[DIV60]], 1
+// CHECK9-NEXT:    store i32 [[SUB61]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK9-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD62]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK9-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK9-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK9-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 8
+// CHECK9-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 8
+// CHECK9-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64** [[TMP213]], align 8
+// CHECK9-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i64** [[TMP214]], align 8
+// CHECK9-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP215]], align 8
+// CHECK9-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP216]], align 8
+// CHECK9-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]])
+// CHECK9-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK9-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK9:       omp_offload.failed64:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506(i64 [[TMP172]], i64 [[TMP174]], double* [[TMP175]], double* [[TMP176]], double* [[TMP177]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
+// CHECK9:       omp_offload.cont65:
+// CHECK9-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV67:%.*]] = bitcast i64* [[N_CASTED66]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP219]], i32* [[CONV67]], align 4
+// CHECK9-NEXT:    [[TMP220:%.*]] = load i64, i64* [[N_CASTED66]], align 8
+// CHECK9-NEXT:    [[TMP221:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP222:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP223:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP225]], align 8
-// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP225]], align 8
+// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP227]], align 8
-// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP227]], align 8
+// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP228]], align 8
-// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to double**
-// CHECK9-NEXT:    store double* [[TMP216]], double** [[TMP230]], align 8
-// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 2
+// CHECK9-NEXT:    store double* [[TMP221]], double** [[TMP230]], align 8
+// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to double**
-// CHECK9-NEXT:    store double* [[TMP216]], double** [[TMP232]], align 8
-// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 2
+// CHECK9-NEXT:    store double* [[TMP221]], double** [[TMP232]], align 8
+// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 1
 // CHECK9-NEXT:    store i8* null, i8** [[TMP233]], align 8
-// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to double**
-// CHECK9-NEXT:    store double* [[TMP217]], double** [[TMP235]], align 8
-// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 3
+// CHECK9-NEXT:    store double* [[TMP222]], double** [[TMP235]], align 8
+// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to double**
-// CHECK9-NEXT:    store double* [[TMP217]], double** [[TMP237]], align 8
-// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 3
+// CHECK9-NEXT:    store double* [[TMP222]], double** [[TMP237]], align 8
+// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 2
 // CHECK9-NEXT:    store i8* null, i8** [[TMP238]], align 8
-// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to double**
-// CHECK9-NEXT:    store double* [[TMP218]], double** [[TMP240]], align 8
-// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 4
+// CHECK9-NEXT:    store double* [[TMP223]], double** [[TMP240]], align 8
+// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to double**
-// CHECK9-NEXT:    store double* [[TMP218]], double** [[TMP242]], align 8
-// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 4
+// CHECK9-NEXT:    store double* [[TMP223]], double** [[TMP242]], align 8
+// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 3
 // CHECK9-NEXT:    store i8* null, i8** [[TMP243]], align 8
-// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[SUB86:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK9-NEXT:    [[DIV87:%.*]] = sdiv i32 [[SUB86]], 1
-// CHECK9-NEXT:    [[SUB88:%.*]] = sub nsw i32 [[DIV87]], 1
-// CHECK9-NEXT:    store i32 [[SUB88]], i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[ADD89:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD89]] to i64
+// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[SUB74:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK9-NEXT:    [[DIV75:%.*]] = sdiv i32 [[SUB74]], 1
+// CHECK9-NEXT:    [[SUB76:%.*]] = sub nsw i32 [[DIV75]], 1
+// CHECK9-NEXT:    store i32 [[SUB76]], i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[ADD77:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD77]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK9-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK9-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED90:%.*]], label [[OMP_OFFLOAD_CONT91:%.*]]
-// CHECK9:       omp_offload.failed90:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562(i64 [[TMP213]], i64 [[TMP215]], double* [[TMP216]], double* [[TMP217]], double* [[TMP218]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT91]]
-// CHECK9:       omp_offload.cont91:
+// CHECK9-NEXT:    [[KERNEL_ARGS78:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK9-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK9-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 8
+// CHECK9-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 8
+// CHECK9-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64** [[TMP254]], align 8
+// CHECK9-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i64** [[TMP255]], align 8
+// CHECK9-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP256]], align 8
+// CHECK9-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP257]], align 8
+// CHECK9-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]])
+// CHECK9-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK9-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED79:%.*]], label [[OMP_OFFLOAD_CONT80:%.*]]
+// CHECK9:       omp_offload.failed79:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536(i64 [[TMP220]], double* [[TMP221]], double* [[TMP222]], double* [[TMP223]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT80]]
+// CHECK9:       omp_offload.cont80:
+// CHECK9-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV82:%.*]] = bitcast i64* [[CH_CASTED81]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP260]], i32* [[CONV82]], align 4
+// CHECK9-NEXT:    [[TMP261:%.*]] = load i64, i64* [[CH_CASTED81]], align 8
+// CHECK9-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV84:%.*]] = bitcast i64* [[N_CASTED83]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP262]], i32* [[CONV84]], align 4
+// CHECK9-NEXT:    [[TMP263:%.*]] = load i64, i64* [[N_CASTED83]], align 8
+// CHECK9-NEXT:    [[TMP264:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP265:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP266:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP268]], align 8
+// CHECK9-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP270]], align 8
+// CHECK9-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP271]], align 8
+// CHECK9-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP273]], align 8
+// CHECK9-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP275]], align 8
+// CHECK9-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP276]], align 8
+// CHECK9-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to double**
+// CHECK9-NEXT:    store double* [[TMP264]], double** [[TMP278]], align 8
+// CHECK9-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to double**
+// CHECK9-NEXT:    store double* [[TMP264]], double** [[TMP280]], align 8
+// CHECK9-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP281]], align 8
+// CHECK9-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to double**
+// CHECK9-NEXT:    store double* [[TMP265]], double** [[TMP283]], align 8
+// CHECK9-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to double**
+// CHECK9-NEXT:    store double* [[TMP265]], double** [[TMP285]], align 8
+// CHECK9-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP286]], align 8
+// CHECK9-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to double**
+// CHECK9-NEXT:    store double* [[TMP266]], double** [[TMP288]], align 8
+// CHECK9-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to double**
+// CHECK9-NEXT:    store double* [[TMP266]], double** [[TMP290]], align 8
+// CHECK9-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP291]], align 8
+// CHECK9-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[SUB91:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK9-NEXT:    [[DIV92:%.*]] = sdiv i32 [[SUB91]], 1
+// CHECK9-NEXT:    [[SUB93:%.*]] = sub nsw i32 [[DIV92]], 1
+// CHECK9-NEXT:    store i32 [[SUB93]], i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[ADD94:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK9-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD94]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK9-NEXT:    [[KERNEL_ARGS95:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK9-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK9-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 8
+// CHECK9-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 8
+// CHECK9-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64** [[TMP302]], align 8
+// CHECK9-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i64** [[TMP303]], align 8
+// CHECK9-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP304]], align 8
+// CHECK9-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP305]], align 8
+// CHECK9-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]])
+// CHECK9-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK9-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED96:%.*]], label [[OMP_OFFLOAD_CONT97:%.*]]
+// CHECK9:       omp_offload.failed96:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562(i64 [[TMP261]], i64 [[TMP263]], double* [[TMP264]], double* [[TMP265]], double* [[TMP266]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT97]]
+// CHECK9:       omp_offload.cont97:
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    ret i32 [[CALL]]
 //
@@ -6514,43 +6633,43 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS21:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS22:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS23:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED32:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP37:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED46:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED48:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_54:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED62:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS64:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS65:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS66:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP67:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_68:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_69:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED76:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED78:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS80:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS81:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS82:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP83:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_84:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_85:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED49:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED51:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS53:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS54:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS55:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED66:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS68:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS69:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS70:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP71:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_72:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_73:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED81:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED83:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS85:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS86:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS87:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP88:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_89:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_90:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK9-NEXT:    store i32 100, i32* [[CH]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N]], align 4
@@ -6605,403 +6724,522 @@ int main() {
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i64** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42(i64 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]], i32* [[TMP4]]) #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP37:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP35]], i32** [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP35]], i32** [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP36]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP36]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    store i32 [[TMP41]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP49]], align 8
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP43]], i32** [[TMP52]], align 8
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP37]], i32** [[TMP54]], align 8
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP37]], i32** [[TMP56]], align 8
-// CHECK9-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK9-NEXT:    store i32* [[TMP43]], i32** [[TMP54]], align 8
+// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP55]], align 8
+// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP44]], i32** [[TMP57]], align 8
+// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP44]], i32** [[TMP59]], align 8
+// CHECK9-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP60]], align 8
+// CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP45]], i32** [[TMP62]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP45]], i32** [[TMP64]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP65]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK9-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK9-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK9-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK9-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK9-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK9-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
-// CHECK9:       omp_offload.failed15:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51(i64 [[TMP34]], i32* [[TMP35]], i32* [[TMP36]], i32* [[TMP37]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
-// CHECK9:       omp_offload.cont16:
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV17:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV17]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[CH_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP70]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP70]], i32** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP71]], i32** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP71]], i32** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP72]], i32** [[TMP94]], align 8
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP72]], i32** [[TMP96]], align 8
-// CHECK9-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK9-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
-// CHECK9-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK9-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
-// CHECK9:       omp_offload.failed30:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59(i64 [[TMP67]], i64 [[TMP69]], i32* [[TMP70]], i32* [[TMP71]], i32* [[TMP72]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
-// CHECK9:       omp_offload.cont31:
-// CHECK9-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV33:%.*]] = bitcast i64* [[N_CASTED32]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP106]], i32* [[CONV33]], align 4
-// CHECK9-NEXT:    [[TMP107:%.*]] = load i64, i64* [[N_CASTED32]], align 8
-// CHECK9-NEXT:    [[TMP108:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP109:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP110:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP112]], align 8
-// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP114]], align 8
-// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK9-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP108]], i32** [[TMP117]], align 8
-// CHECK9-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP108]], i32** [[TMP119]], align 8
-// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP120]], align 8
-// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP109]], i32** [[TMP122]], align 8
-// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP109]], i32** [[TMP124]], align 8
-// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP125]], align 8
-// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP110]], i32** [[TMP127]], align 8
-// CHECK9-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP110]], i32** [[TMP129]], align 8
-// CHECK9-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK9-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK9-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
-// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
-// CHECK9-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK9-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK9-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK9-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
-// CHECK9:       omp_offload.failed44:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67(i64 [[TMP107]], i32* [[TMP108]], i32* [[TMP109]], i32* [[TMP110]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
-// CHECK9:       omp_offload.cont45:
-// CHECK9-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV47:%.*]] = bitcast i64* [[CH_CASTED46]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP139]], i32* [[CONV47]], align 4
-// CHECK9-NEXT:    [[TMP140:%.*]] = load i64, i64* [[CH_CASTED46]], align 8
-// CHECK9-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV49:%.*]] = bitcast i64* [[N_CASTED48]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP141]], i32* [[CONV49]], align 4
-// CHECK9-NEXT:    [[TMP142:%.*]] = load i64, i64* [[N_CASTED48]], align 8
-// CHECK9-NEXT:    [[TMP143:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP144:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP145:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP147]], align 8
-// CHECK9-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP149]], align 8
-// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP150]], align 8
-// CHECK9-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP152]], align 8
-// CHECK9-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP154]], align 8
-// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP143]], i32** [[TMP157]], align 8
-// CHECK9-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP143]], i32** [[TMP159]], align 8
-// CHECK9-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP144]], i32** [[TMP162]], align 8
-// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP144]], i32** [[TMP164]], align 8
-// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP165]], align 8
-// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP145]], i32** [[TMP167]], align 8
-// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP145]], i32** [[TMP169]], align 8
-// CHECK9-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP170]], align 8
-// CHECK9-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i64** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]])
+// CHECK9-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK9-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9:       omp_offload.failed16:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51(i64 [[TMP42]], i32* [[TMP43]], i32* [[TMP44]], i32* [[TMP45]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
+// CHECK9:       omp_offload.cont17:
+// CHECK9-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP82]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP83:%.*]] = load i64, i64* [[CH_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP90]], align 8
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP92]], align 8
+// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP93]], align 8
+// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP95]], align 8
+// CHECK9-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP97]], align 8
+// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP98]], align 8
+// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP86]], i32** [[TMP100]], align 8
+// CHECK9-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP86]], i32** [[TMP102]], align 8
+// CHECK9-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP103]], align 8
+// CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP87]], i32** [[TMP105]], align 8
+// CHECK9-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP87]], i32** [[TMP107]], align 8
+// CHECK9-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK9-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP88]], i32** [[TMP110]], align 8
+// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP88]], i32** [[TMP112]], align 8
+// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK9-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
+// CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK9-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK9-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 8
+// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 8
+// CHECK9-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64** [[TMP124]], align 8
+// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i64** [[TMP125]], align 8
+// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP126]], align 8
+// CHECK9-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP127]], align 8
+// CHECK9-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]])
+// CHECK9-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK9-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9:       omp_offload.failed32:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59(i64 [[TMP83]], i64 [[TMP85]], i32* [[TMP86]], i32* [[TMP87]], i32* [[TMP88]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
+// CHECK9:       omp_offload.cont33:
+// CHECK9-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP130]], i32* [[CONV35]], align 4
+// CHECK9-NEXT:    [[TMP131:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK9-NEXT:    [[TMP132:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP133:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP134:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP136]], align 8
+// CHECK9-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP138]], align 8
+// CHECK9-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP139]], align 8
+// CHECK9-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP132]], i32** [[TMP141]], align 8
+// CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP132]], i32** [[TMP143]], align 8
+// CHECK9-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP144]], align 8
+// CHECK9-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP133]], i32** [[TMP146]], align 8
+// CHECK9-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP133]], i32** [[TMP148]], align 8
+// CHECK9-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP149]], align 8
+// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP134]], i32** [[TMP151]], align 8
+// CHECK9-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP134]], i32** [[TMP153]], align 8
+// CHECK9-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK9-NEXT:    [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1
+// CHECK9-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[DIV43]], 1
+// CHECK9-NEXT:    store i32 [[SUB44]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK9-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD45]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK9-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK9-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 8
+// CHECK9-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 8
+// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64** [[TMP165]], align 8
+// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i64** [[TMP166]], align 8
+// CHECK9-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP167]], align 8
+// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP168]], align 8
+// CHECK9-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]])
+// CHECK9-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK9-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK9:       omp_offload.failed47:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67(i64 [[TMP131]], i32* [[TMP132]], i32* [[TMP133]], i32* [[TMP134]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
+// CHECK9:       omp_offload.cont48:
+// CHECK9-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV50:%.*]] = bitcast i64* [[CH_CASTED49]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP171]], i32* [[CONV50]], align 4
+// CHECK9-NEXT:    [[TMP172:%.*]] = load i64, i64* [[CH_CASTED49]], align 8
 // CHECK9-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK9-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
-// CHECK9-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
-// CHECK9-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK9-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK9-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK9-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
-// CHECK9:       omp_offload.failed60:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i64 [[TMP140]], i64 [[TMP142]], i32* [[TMP143]], i32* [[TMP144]], i32* [[TMP145]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
-// CHECK9:       omp_offload.cont61:
-// CHECK9-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV63:%.*]] = bitcast i64* [[N_CASTED62]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP179]], i32* [[CONV63]], align 4
-// CHECK9-NEXT:    [[TMP180:%.*]] = load i64, i64* [[N_CASTED62]], align 8
-// CHECK9-NEXT:    [[TMP181:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP182:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP183:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP185]], align 8
-// CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP187]], align 8
-// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP188]], align 8
-// CHECK9-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP181]], i32** [[TMP190]], align 8
-// CHECK9-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP181]], i32** [[TMP192]], align 8
-// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP193]], align 8
-// CHECK9-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP182]], i32** [[TMP195]], align 8
-// CHECK9-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP182]], i32** [[TMP197]], align 8
-// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP198]], align 8
-// CHECK9-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP183]], i32** [[TMP200]], align 8
-// CHECK9-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP183]], i32** [[TMP202]], align 8
-// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP203]], align 8
-// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[SUB70:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK9-NEXT:    [[DIV71:%.*]] = sdiv i32 [[SUB70]], 1
-// CHECK9-NEXT:    [[SUB72:%.*]] = sub nsw i32 [[DIV71]], 1
-// CHECK9-NEXT:    store i32 [[SUB72]], i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[ADD73:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK9-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD73]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK9-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK9-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED74:%.*]], label [[OMP_OFFLOAD_CONT75:%.*]]
-// CHECK9:       omp_offload.failed74:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83(i64 [[TMP180]], i32* [[TMP181]], i32* [[TMP182]], i32* [[TMP183]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT75]]
-// CHECK9:       omp_offload.cont75:
-// CHECK9-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV77:%.*]] = bitcast i64* [[CH_CASTED76]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP212]], i32* [[CONV77]], align 4
-// CHECK9-NEXT:    [[TMP213:%.*]] = load i64, i64* [[CH_CASTED76]], align 8
-// CHECK9-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV79:%.*]] = bitcast i64* [[N_CASTED78]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP214]], i32* [[CONV79]], align 4
-// CHECK9-NEXT:    [[TMP215:%.*]] = load i64, i64* [[N_CASTED78]], align 8
-// CHECK9-NEXT:    [[TMP216:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP217:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP218:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP220]], align 8
-// CHECK9-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP222]], align 8
-// CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP223]], align 8
-// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 1
+// CHECK9-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[CONV52]], align 4
+// CHECK9-NEXT:    [[TMP174:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK9-NEXT:    [[TMP175:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP176:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP177:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP179]], align 8
+// CHECK9-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP181]], align 8
+// CHECK9-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP182]], align 8
+// CHECK9-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP184]], align 8
+// CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP186]], align 8
+// CHECK9-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP187]], align 8
+// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP175]], i32** [[TMP189]], align 8
+// CHECK9-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP175]], i32** [[TMP191]], align 8
+// CHECK9-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP192]], align 8
+// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP176]], i32** [[TMP194]], align 8
+// CHECK9-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP176]], i32** [[TMP196]], align 8
+// CHECK9-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP197]], align 8
+// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP177]], i32** [[TMP199]], align 8
+// CHECK9-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP177]], i32** [[TMP201]], align 8
+// CHECK9-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP202]], align 8
+// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[SUB59:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK9-NEXT:    [[DIV60:%.*]] = sdiv i32 [[SUB59]], 1
+// CHECK9-NEXT:    [[SUB61:%.*]] = sub nsw i32 [[DIV60]], 1
+// CHECK9-NEXT:    store i32 [[SUB61]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK9-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD62]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK9-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK9-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK9-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 8
+// CHECK9-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 8
+// CHECK9-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64** [[TMP213]], align 8
+// CHECK9-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i64** [[TMP214]], align 8
+// CHECK9-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP215]], align 8
+// CHECK9-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP216]], align 8
+// CHECK9-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]])
+// CHECK9-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK9-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK9:       omp_offload.failed64:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i64 [[TMP172]], i64 [[TMP174]], i32* [[TMP175]], i32* [[TMP176]], i32* [[TMP177]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
+// CHECK9:       omp_offload.cont65:
+// CHECK9-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV67:%.*]] = bitcast i64* [[N_CASTED66]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP219]], i32* [[CONV67]], align 4
+// CHECK9-NEXT:    [[TMP220:%.*]] = load i64, i64* [[N_CASTED66]], align 8
+// CHECK9-NEXT:    [[TMP221:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP222:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP223:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP225]], align 8
-// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP225]], align 8
+// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP227]], align 8
-// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP227]], align 8
+// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP228]], align 8
-// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP216]], i32** [[TMP230]], align 8
-// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 2
+// CHECK9-NEXT:    store i32* [[TMP221]], i32** [[TMP230]], align 8
+// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP216]], i32** [[TMP232]], align 8
-// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 2
+// CHECK9-NEXT:    store i32* [[TMP221]], i32** [[TMP232]], align 8
+// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 1
 // CHECK9-NEXT:    store i8* null, i8** [[TMP233]], align 8
-// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP217]], i32** [[TMP235]], align 8
-// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 3
+// CHECK9-NEXT:    store i32* [[TMP222]], i32** [[TMP235]], align 8
+// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP217]], i32** [[TMP237]], align 8
-// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 3
+// CHECK9-NEXT:    store i32* [[TMP222]], i32** [[TMP237]], align 8
+// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 2
 // CHECK9-NEXT:    store i8* null, i8** [[TMP238]], align 8
-// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP218]], i32** [[TMP240]], align 8
-// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 4
+// CHECK9-NEXT:    store i32* [[TMP223]], i32** [[TMP240]], align 8
+// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP218]], i32** [[TMP242]], align 8
-// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 4
+// CHECK9-NEXT:    store i32* [[TMP223]], i32** [[TMP242]], align 8
+// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 3
 // CHECK9-NEXT:    store i8* null, i8** [[TMP243]], align 8
-// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[SUB86:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK9-NEXT:    [[DIV87:%.*]] = sdiv i32 [[SUB86]], 1
-// CHECK9-NEXT:    [[SUB88:%.*]] = sub nsw i32 [[DIV87]], 1
-// CHECK9-NEXT:    store i32 [[SUB88]], i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[ADD89:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD89]] to i64
+// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[SUB74:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK9-NEXT:    [[DIV75:%.*]] = sdiv i32 [[SUB74]], 1
+// CHECK9-NEXT:    [[SUB76:%.*]] = sub nsw i32 [[DIV75]], 1
+// CHECK9-NEXT:    store i32 [[SUB76]], i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[ADD77:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD77]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK9-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK9-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED90:%.*]], label [[OMP_OFFLOAD_CONT91:%.*]]
-// CHECK9:       omp_offload.failed90:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91(i64 [[TMP213]], i64 [[TMP215]], i32* [[TMP216]], i32* [[TMP217]], i32* [[TMP218]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT91]]
-// CHECK9:       omp_offload.cont91:
+// CHECK9-NEXT:    [[KERNEL_ARGS78:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK9-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK9-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 8
+// CHECK9-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 8
+// CHECK9-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64** [[TMP254]], align 8
+// CHECK9-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i64** [[TMP255]], align 8
+// CHECK9-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP256]], align 8
+// CHECK9-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP257]], align 8
+// CHECK9-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]])
+// CHECK9-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK9-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED79:%.*]], label [[OMP_OFFLOAD_CONT80:%.*]]
+// CHECK9:       omp_offload.failed79:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83(i64 [[TMP220]], i32* [[TMP221]], i32* [[TMP222]], i32* [[TMP223]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT80]]
+// CHECK9:       omp_offload.cont80:
+// CHECK9-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV82:%.*]] = bitcast i64* [[CH_CASTED81]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP260]], i32* [[CONV82]], align 4
+// CHECK9-NEXT:    [[TMP261:%.*]] = load i64, i64* [[CH_CASTED81]], align 8
+// CHECK9-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV84:%.*]] = bitcast i64* [[N_CASTED83]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP262]], i32* [[CONV84]], align 4
+// CHECK9-NEXT:    [[TMP263:%.*]] = load i64, i64* [[N_CASTED83]], align 8
+// CHECK9-NEXT:    [[TMP264:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP265:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP266:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP268]], align 8
+// CHECK9-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP270]], align 8
+// CHECK9-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP271]], align 8
+// CHECK9-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP273]], align 8
+// CHECK9-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP275]], align 8
+// CHECK9-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP276]], align 8
+// CHECK9-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP264]], i32** [[TMP278]], align 8
+// CHECK9-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP264]], i32** [[TMP280]], align 8
+// CHECK9-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP281]], align 8
+// CHECK9-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP265]], i32** [[TMP283]], align 8
+// CHECK9-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP265]], i32** [[TMP285]], align 8
+// CHECK9-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP286]], align 8
+// CHECK9-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP266]], i32** [[TMP288]], align 8
+// CHECK9-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP266]], i32** [[TMP290]], align 8
+// CHECK9-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP291]], align 8
+// CHECK9-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[SUB91:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK9-NEXT:    [[DIV92:%.*]] = sdiv i32 [[SUB91]], 1
+// CHECK9-NEXT:    [[SUB93:%.*]] = sub nsw i32 [[DIV92]], 1
+// CHECK9-NEXT:    store i32 [[SUB93]], i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[ADD94:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK9-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD94]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK9-NEXT:    [[KERNEL_ARGS95:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK9-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK9-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 8
+// CHECK9-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 8
+// CHECK9-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64** [[TMP302]], align 8
+// CHECK9-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i64** [[TMP303]], align 8
+// CHECK9-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP304]], align 8
+// CHECK9-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP305]], align 8
+// CHECK9-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]])
+// CHECK9-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK9-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED96:%.*]], label [[OMP_OFFLOAD_CONT97:%.*]]
+// CHECK9:       omp_offload.failed96:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91(i64 [[TMP261]], i64 [[TMP263]], i32* [[TMP264]], i32* [[TMP265]], i32* [[TMP266]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT97]]
+// CHECK9:       omp_offload.cont97:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -8739,43 +8977,43 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED16:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP20:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS19:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED29:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS30:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS31:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS32:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP33:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_35:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED42:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED43:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS44:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS45:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS46:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP47:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_48:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED56:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_61:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_62:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED69:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED70:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS71:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS72:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS73:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP74:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_75:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_76:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS48:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS49:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS63:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS77:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS78:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK11-NEXT:    store i32 100, i32* [[CH]], align 4
@@ -8830,394 +9068,513 @@ int main() {
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l369(i32 [[TMP1]], double* [[TMP2]], double* [[TMP3]], double* [[TMP4]]) #[[ATTR2:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to double**
-// CHECK11-NEXT:    store double* [[TMP35]], double** [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to double**
-// CHECK11-NEXT:    store double* [[TMP35]], double** [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
-// CHECK11-NEXT:    store double* [[TMP36]], double** [[TMP49]], align 4
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK11-NEXT:    store double* [[TMP36]], double** [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP47]], align 4
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP49]], align 4
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP50]], align 4
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
+// CHECK11-NEXT:    store double* [[TMP43]], double** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK11-NEXT:    store double* [[TMP37]], double** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to double**
-// CHECK11-NEXT:    store double* [[TMP37]], double** [[TMP56]], align 4
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP57]], align 4
-// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK11-NEXT:    store double* [[TMP43]], double** [[TMP54]], align 4
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP55]], align 4
+// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
+// CHECK11-NEXT:    store double* [[TMP44]], double** [[TMP57]], align 4
+// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to double**
+// CHECK11-NEXT:    store double* [[TMP44]], double** [[TMP59]], align 4
+// CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP60]], align 4
+// CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double**
+// CHECK11-NEXT:    store double* [[TMP45]], double** [[TMP62]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to double**
+// CHECK11-NEXT:    store double* [[TMP45]], double** [[TMP64]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP65]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK11-NEXT:    [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
 // CHECK11-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[DIV11]], 1
 // CHECK11-NEXT:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK11-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK11-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK11-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
-// CHECK11:       omp_offload.failed14:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408(i32 [[TMP34]], double* [[TMP35]], double* [[TMP36]], double* [[TMP37]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
-// CHECK11:       omp_offload.cont15:
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP71:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to double**
-// CHECK11-NEXT:    store double* [[TMP70]], double** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to double**
-// CHECK11-NEXT:    store double* [[TMP70]], double** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
-// CHECK11-NEXT:    store double* [[TMP71]], double** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to double**
-// CHECK11-NEXT:    store double* [[TMP71]], double** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK11-NEXT:    store double* [[TMP72]], double** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to double**
-// CHECK11-NEXT:    store double* [[TMP72]], double** [[TMP96]], align 4
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP97]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK11-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[DIV24]], 1
-// CHECK11-NEXT:    store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[ADD26:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK11-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK11-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
-// CHECK11:       omp_offload.failed27:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447(i32 [[TMP67]], i32 [[TMP69]], double* [[TMP70]], double* [[TMP71]], double* [[TMP72]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
-// CHECK11:       omp_offload.cont28:
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP106]], i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP107:%.*]] = load i32, i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP108:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP109:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP110:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP112]], align 4
-// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP114]], align 4
-// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK11-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to double**
-// CHECK11-NEXT:    store double* [[TMP108]], double** [[TMP117]], align 4
-// CHECK11-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to double**
-// CHECK11-NEXT:    store double* [[TMP108]], double** [[TMP119]], align 4
-// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP120]], align 4
-// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to double**
-// CHECK11-NEXT:    store double* [[TMP109]], double** [[TMP122]], align 4
-// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to double**
-// CHECK11-NEXT:    store double* [[TMP109]], double** [[TMP124]], align 4
-// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
-// CHECK11-NEXT:    store double* [[TMP110]], double** [[TMP127]], align 4
-// CHECK11-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
-// CHECK11-NEXT:    store double* [[TMP110]], double** [[TMP129]], align 4
-// CHECK11-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK11-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[SUB36:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK11-NEXT:    [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1
-// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[DIV37]], 1
-// CHECK11-NEXT:    store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[ADD39:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK11-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD39]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK11-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK11-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED40:%.*]], label [[OMP_OFFLOAD_CONT41:%.*]]
-// CHECK11:       omp_offload.failed40:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478(i32 [[TMP107]], double* [[TMP108]], double* [[TMP109]], double* [[TMP110]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT41]]
-// CHECK11:       omp_offload.cont41:
-// CHECK11-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP139]], i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP140:%.*]] = load i32, i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP141]], i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP142:%.*]] = load i32, i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP143:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP144:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP145:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP147]], align 4
-// CHECK11-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP149]], align 4
-// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP150]], align 4
-// CHECK11-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP152]], align 4
-// CHECK11-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP154]], align 4
-// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP155]], align 4
-// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to double**
-// CHECK11-NEXT:    store double* [[TMP143]], double** [[TMP157]], align 4
-// CHECK11-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to double**
-// CHECK11-NEXT:    store double* [[TMP143]], double** [[TMP159]], align 4
-// CHECK11-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to double**
-// CHECK11-NEXT:    store double* [[TMP144]], double** [[TMP162]], align 4
-// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to double**
-// CHECK11-NEXT:    store double* [[TMP144]], double** [[TMP164]], align 4
-// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to double**
-// CHECK11-NEXT:    store double* [[TMP145]], double** [[TMP167]], align 4
-// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to double**
-// CHECK11-NEXT:    store double* [[TMP145]], double** [[TMP169]], align 4
-// CHECK11-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP170]], align 4
-// CHECK11-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK11-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD13]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]])
+// CHECK11-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK11-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11:       omp_offload.failed15:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l408(i32 [[TMP42]], double* [[TMP43]], double* [[TMP44]], double* [[TMP45]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
+// CHECK11:       omp_offload.cont16:
+// CHECK11-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP82]], i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = load i32, i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP84]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP92]], align 4
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP95]], align 4
+// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP97]], align 4
+// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP98]], align 4
+// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to double**
+// CHECK11-NEXT:    store double* [[TMP86]], double** [[TMP100]], align 4
+// CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to double**
+// CHECK11-NEXT:    store double* [[TMP86]], double** [[TMP102]], align 4
+// CHECK11-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP103]], align 4
+// CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to double**
+// CHECK11-NEXT:    store double* [[TMP87]], double** [[TMP105]], align 4
+// CHECK11-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to double**
+// CHECK11-NEXT:    store double* [[TMP87]], double** [[TMP107]], align 4
+// CHECK11-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK11-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to double**
+// CHECK11-NEXT:    store double* [[TMP88]], double** [[TMP110]], align 4
+// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to double**
+// CHECK11-NEXT:    store double* [[TMP88]], double** [[TMP112]], align 4
+// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK11-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[SUB24:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK11-NEXT:    [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[DIV25]], 1
+// CHECK11-NEXT:    store i32 [[SUB26]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK11-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD27]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK11-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 4
+// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 4
+// CHECK11-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP124]], align 4
+// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP125]], align 4
+// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP126]], align 4
+// CHECK11-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP127]], align 4
+// CHECK11-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]])
+// CHECK11-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK11-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11:       omp_offload.failed29:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l447(i32 [[TMP83]], i32 [[TMP85]], double* [[TMP86]], double* [[TMP87]], double* [[TMP88]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
+// CHECK11:       omp_offload.cont30:
+// CHECK11-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP130]], i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP132:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP133:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP134:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP136]], align 4
+// CHECK11-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP138]], align 4
+// CHECK11-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK11-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
+// CHECK11-NEXT:    store double* [[TMP132]], double** [[TMP141]], align 4
+// CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
+// CHECK11-NEXT:    store double* [[TMP132]], double** [[TMP143]], align 4
+// CHECK11-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP144]], align 4
+// CHECK11-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to double**
+// CHECK11-NEXT:    store double* [[TMP133]], double** [[TMP146]], align 4
+// CHECK11-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to double**
+// CHECK11-NEXT:    store double* [[TMP133]], double** [[TMP148]], align 4
+// CHECK11-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP149]], align 4
+// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to double**
+// CHECK11-NEXT:    store double* [[TMP134]], double** [[TMP151]], align 4
+// CHECK11-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to double**
+// CHECK11-NEXT:    store double* [[TMP134]], double** [[TMP153]], align 4
+// CHECK11-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK11-NEXT:    [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1
+// CHECK11-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[DIV39]], 1
+// CHECK11-NEXT:    store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK11-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD41]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK11-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 4
+// CHECK11-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 4
+// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64** [[TMP165]], align 4
+// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i64** [[TMP166]], align 4
+// CHECK11-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP167]], align 4
+// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP168]], align 4
+// CHECK11-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]])
+// CHECK11-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK11-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK11:       omp_offload.failed43:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l478(i32 [[TMP131]], double* [[TMP132]], double* [[TMP133]], double* [[TMP134]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
+// CHECK11:       omp_offload.cont44:
+// CHECK11-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP171]], i32* [[CH_CASTED45]], align 4
+// CHECK11-NEXT:    [[TMP172:%.*]] = load i32, i32* [[CH_CASTED45]], align 4
 // CHECK11-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[SUB50:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK11-NEXT:    [[DIV51:%.*]] = sdiv i32 [[SUB50]], 1
-// CHECK11-NEXT:    [[SUB52:%.*]] = sub nsw i32 [[DIV51]], 1
-// CHECK11-NEXT:    store i32 [[SUB52]], i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[ADD53:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK11-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD53]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK11-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK11-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED54:%.*]], label [[OMP_OFFLOAD_CONT55:%.*]]
-// CHECK11:       omp_offload.failed54:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506(i32 [[TMP140]], i32 [[TMP142]], double* [[TMP143]], double* [[TMP144]], double* [[TMP145]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT55]]
-// CHECK11:       omp_offload.cont55:
-// CHECK11-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP179]], i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP180:%.*]] = load i32, i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP181:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP182:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP183:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP185]], align 4
-// CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP187]], align 4
-// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP188]], align 4
-// CHECK11-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to double**
-// CHECK11-NEXT:    store double* [[TMP181]], double** [[TMP190]], align 4
-// CHECK11-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to double**
-// CHECK11-NEXT:    store double* [[TMP181]], double** [[TMP192]], align 4
-// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP193]], align 4
-// CHECK11-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to double**
-// CHECK11-NEXT:    store double* [[TMP182]], double** [[TMP195]], align 4
-// CHECK11-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to double**
-// CHECK11-NEXT:    store double* [[TMP182]], double** [[TMP197]], align 4
-// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP198]], align 4
-// CHECK11-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to double**
-// CHECK11-NEXT:    store double* [[TMP183]], double** [[TMP200]], align 4
-// CHECK11-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to double**
-// CHECK11-NEXT:    store double* [[TMP183]], double** [[TMP202]], align 4
-// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP203]], align 4
-// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[SUB63:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK11-NEXT:    [[DIV64:%.*]] = sdiv i32 [[SUB63]], 1
-// CHECK11-NEXT:    [[SUB65:%.*]] = sub nsw i32 [[DIV64]], 1
-// CHECK11-NEXT:    store i32 [[SUB65]], i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK11-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD66]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK11-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK11-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED67:%.*]], label [[OMP_OFFLOAD_CONT68:%.*]]
-// CHECK11:       omp_offload.failed67:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536(i32 [[TMP180]], double* [[TMP181]], double* [[TMP182]], double* [[TMP183]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT68]]
-// CHECK11:       omp_offload.cont68:
-// CHECK11-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP212]], i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP213:%.*]] = load i32, i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP214]], i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP215:%.*]] = load i32, i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP216:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP217:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP218:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP220]], align 4
-// CHECK11-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP222]], align 4
-// CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP223]], align 4
-// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP175:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP176:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP177:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP179]], align 4
+// CHECK11-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP181]], align 4
+// CHECK11-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP182]], align 4
+// CHECK11-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP184]], align 4
+// CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP186]], align 4
+// CHECK11-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP187]], align 4
+// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to double**
+// CHECK11-NEXT:    store double* [[TMP175]], double** [[TMP189]], align 4
+// CHECK11-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to double**
+// CHECK11-NEXT:    store double* [[TMP175]], double** [[TMP191]], align 4
+// CHECK11-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP192]], align 4
+// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to double**
+// CHECK11-NEXT:    store double* [[TMP176]], double** [[TMP194]], align 4
+// CHECK11-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to double**
+// CHECK11-NEXT:    store double* [[TMP176]], double** [[TMP196]], align 4
+// CHECK11-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP197]], align 4
+// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to double**
+// CHECK11-NEXT:    store double* [[TMP177]], double** [[TMP199]], align 4
+// CHECK11-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to double**
+// CHECK11-NEXT:    store double* [[TMP177]], double** [[TMP201]], align 4
+// CHECK11-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP202]], align 4
+// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[SUB53:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK11-NEXT:    [[DIV54:%.*]] = sdiv i32 [[SUB53]], 1
+// CHECK11-NEXT:    [[SUB55:%.*]] = sub nsw i32 [[DIV54]], 1
+// CHECK11-NEXT:    store i32 [[SUB55]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK11-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD56]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK11-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK11-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 4
+// CHECK11-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 4
+// CHECK11-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64** [[TMP213]], align 4
+// CHECK11-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i64** [[TMP214]], align 4
+// CHECK11-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP215]], align 4
+// CHECK11-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP216]], align 4
+// CHECK11-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]])
+// CHECK11-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK11-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK11:       omp_offload.failed58:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l506(i32 [[TMP172]], i32 [[TMP174]], double* [[TMP175]], double* [[TMP176]], double* [[TMP177]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
+// CHECK11:       omp_offload.cont59:
+// CHECK11-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP219]], i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP220:%.*]] = load i32, i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP221:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP222:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP223:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP225]], align 4
-// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP225]], align 4
+// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP227]], align 4
-// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP227]], align 4
+// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP228]], align 4
-// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to double**
-// CHECK11-NEXT:    store double* [[TMP216]], double** [[TMP230]], align 4
-// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 2
+// CHECK11-NEXT:    store double* [[TMP221]], double** [[TMP230]], align 4
+// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to double**
-// CHECK11-NEXT:    store double* [[TMP216]], double** [[TMP232]], align 4
-// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 2
+// CHECK11-NEXT:    store double* [[TMP221]], double** [[TMP232]], align 4
+// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 1
 // CHECK11-NEXT:    store i8* null, i8** [[TMP233]], align 4
-// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to double**
-// CHECK11-NEXT:    store double* [[TMP217]], double** [[TMP235]], align 4
-// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 3
+// CHECK11-NEXT:    store double* [[TMP222]], double** [[TMP235]], align 4
+// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to double**
-// CHECK11-NEXT:    store double* [[TMP217]], double** [[TMP237]], align 4
-// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 3
+// CHECK11-NEXT:    store double* [[TMP222]], double** [[TMP237]], align 4
+// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 2
 // CHECK11-NEXT:    store i8* null, i8** [[TMP238]], align 4
-// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to double**
-// CHECK11-NEXT:    store double* [[TMP218]], double** [[TMP240]], align 4
-// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 4
+// CHECK11-NEXT:    store double* [[TMP223]], double** [[TMP240]], align 4
+// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to double**
-// CHECK11-NEXT:    store double* [[TMP218]], double** [[TMP242]], align 4
-// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 4
+// CHECK11-NEXT:    store double* [[TMP223]], double** [[TMP242]], align 4
+// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 3
 // CHECK11-NEXT:    store i8* null, i8** [[TMP243]], align 4
-// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[SUB77:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK11-NEXT:    [[DIV78:%.*]] = sdiv i32 [[SUB77]], 1
-// CHECK11-NEXT:    [[SUB79:%.*]] = sub nsw i32 [[DIV78]], 1
-// CHECK11-NEXT:    store i32 [[SUB79]], i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[ADD80:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD80]] to i64
+// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[SUB67:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK11-NEXT:    [[DIV68:%.*]] = sdiv i32 [[SUB67]], 1
+// CHECK11-NEXT:    [[SUB69:%.*]] = sub nsw i32 [[DIV68]], 1
+// CHECK11-NEXT:    store i32 [[SUB69]], i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD70]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK11-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK11-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED81:%.*]], label [[OMP_OFFLOAD_CONT82:%.*]]
-// CHECK11:       omp_offload.failed81:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562(i32 [[TMP213]], i32 [[TMP215]], double* [[TMP216]], double* [[TMP217]], double* [[TMP218]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT82]]
-// CHECK11:       omp_offload.cont82:
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK11-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK11-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 4
+// CHECK11-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 4
+// CHECK11-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64** [[TMP254]], align 4
+// CHECK11-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i64** [[TMP255]], align 4
+// CHECK11-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP256]], align 4
+// CHECK11-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP257]], align 4
+// CHECK11-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]])
+// CHECK11-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK11-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED72:%.*]], label [[OMP_OFFLOAD_CONT73:%.*]]
+// CHECK11:       omp_offload.failed72:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l536(i32 [[TMP220]], double* [[TMP221]], double* [[TMP222]], double* [[TMP223]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT73]]
+// CHECK11:       omp_offload.cont73:
+// CHECK11-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP260]], i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP261:%.*]] = load i32, i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP262]], i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP263:%.*]] = load i32, i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP264:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP265:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP266:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP268]], align 4
+// CHECK11-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP270]], align 4
+// CHECK11-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP271]], align 4
+// CHECK11-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP273]], align 4
+// CHECK11-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP275]], align 4
+// CHECK11-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP276]], align 4
+// CHECK11-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to double**
+// CHECK11-NEXT:    store double* [[TMP264]], double** [[TMP278]], align 4
+// CHECK11-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to double**
+// CHECK11-NEXT:    store double* [[TMP264]], double** [[TMP280]], align 4
+// CHECK11-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP281]], align 4
+// CHECK11-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to double**
+// CHECK11-NEXT:    store double* [[TMP265]], double** [[TMP283]], align 4
+// CHECK11-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to double**
+// CHECK11-NEXT:    store double* [[TMP265]], double** [[TMP285]], align 4
+// CHECK11-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP286]], align 4
+// CHECK11-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to double**
+// CHECK11-NEXT:    store double* [[TMP266]], double** [[TMP288]], align 4
+// CHECK11-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to double**
+// CHECK11-NEXT:    store double* [[TMP266]], double** [[TMP290]], align 4
+// CHECK11-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP291]], align 4
+// CHECK11-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[SUB82:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK11-NEXT:    [[DIV83:%.*]] = sdiv i32 [[SUB82]], 1
+// CHECK11-NEXT:    [[SUB84:%.*]] = sub nsw i32 [[DIV83]], 1
+// CHECK11-NEXT:    store i32 [[SUB84]], i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK11-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD85]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK11-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK11-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 4
+// CHECK11-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 4
+// CHECK11-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64** [[TMP302]], align 4
+// CHECK11-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i64** [[TMP303]], align 4
+// CHECK11-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP304]], align 4
+// CHECK11-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP305]], align 4
+// CHECK11-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]])
+// CHECK11-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK11-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED87:%.*]], label [[OMP_OFFLOAD_CONT88:%.*]]
+// CHECK11:       omp_offload.failed87:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l562(i32 [[TMP261]], i32 [[TMP263]], double* [[TMP264]], double* [[TMP265]], double* [[TMP266]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT88]]
+// CHECK11:       omp_offload.cont88:
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    ret i32 [[CALL]]
 //
@@ -10868,43 +11225,43 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED16:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP20:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS19:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED29:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS30:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS31:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS32:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP33:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_35:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED42:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED43:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS44:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS45:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS46:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP47:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_48:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED56:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_61:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_62:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED69:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED70:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS71:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS72:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS73:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP74:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_75:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_76:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS48:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS49:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS63:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS77:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS78:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK11-NEXT:    store i32 100, i32* [[CH]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N]], align 4
@@ -10958,394 +11315,513 @@ int main() {
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i64** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42(i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]], i32* [[TMP4]]) #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP35]], i32** [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP35]], i32** [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP36]], i32** [[TMP49]], align 4
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP36]], i32** [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP47]], align 4
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP49]], align 4
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP50]], align 4
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP43]], i32** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP37]], i32** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP37]], i32** [[TMP56]], align 4
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP57]], align 4
-// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK11-NEXT:    store i32* [[TMP43]], i32** [[TMP54]], align 4
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP55]], align 4
+// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP44]], i32** [[TMP57]], align 4
+// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP44]], i32** [[TMP59]], align 4
+// CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP60]], align 4
+// CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP45]], i32** [[TMP62]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP45]], i32** [[TMP64]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP65]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK11-NEXT:    [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
 // CHECK11-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[DIV11]], 1
 // CHECK11-NEXT:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK11-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK11-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK11-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
-// CHECK11:       omp_offload.failed14:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51(i32 [[TMP34]], i32* [[TMP35]], i32* [[TMP36]], i32* [[TMP37]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
-// CHECK11:       omp_offload.cont15:
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP71:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP70]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP70]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP71]], i32** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP71]], i32** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP72]], i32** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP72]], i32** [[TMP96]], align 4
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP97]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK11-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[DIV24]], 1
-// CHECK11-NEXT:    store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[ADD26:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK11-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK11-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
-// CHECK11:       omp_offload.failed27:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59(i32 [[TMP67]], i32 [[TMP69]], i32* [[TMP70]], i32* [[TMP71]], i32* [[TMP72]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
-// CHECK11:       omp_offload.cont28:
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP106]], i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP107:%.*]] = load i32, i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP108:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP109:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP110:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP112]], align 4
-// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP114]], align 4
-// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK11-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP108]], i32** [[TMP117]], align 4
-// CHECK11-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP108]], i32** [[TMP119]], align 4
-// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP120]], align 4
-// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP109]], i32** [[TMP122]], align 4
-// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP109]], i32** [[TMP124]], align 4
-// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP110]], i32** [[TMP127]], align 4
-// CHECK11-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP110]], i32** [[TMP129]], align 4
-// CHECK11-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK11-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[SUB36:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK11-NEXT:    [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1
-// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[DIV37]], 1
-// CHECK11-NEXT:    store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[ADD39:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK11-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD39]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK11-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK11-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED40:%.*]], label [[OMP_OFFLOAD_CONT41:%.*]]
-// CHECK11:       omp_offload.failed40:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67(i32 [[TMP107]], i32* [[TMP108]], i32* [[TMP109]], i32* [[TMP110]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT41]]
-// CHECK11:       omp_offload.cont41:
-// CHECK11-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP139]], i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP140:%.*]] = load i32, i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP141]], i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP142:%.*]] = load i32, i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP143:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP144:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP145:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP147]], align 4
-// CHECK11-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP149]], align 4
-// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP150]], align 4
-// CHECK11-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP152]], align 4
-// CHECK11-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP154]], align 4
-// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP155]], align 4
-// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP143]], i32** [[TMP157]], align 4
-// CHECK11-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP143]], i32** [[TMP159]], align 4
-// CHECK11-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP144]], i32** [[TMP162]], align 4
-// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP144]], i32** [[TMP164]], align 4
-// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP145]], i32** [[TMP167]], align 4
-// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP145]], i32** [[TMP169]], align 4
-// CHECK11-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP170]], align 4
-// CHECK11-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK11-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD13]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i64** [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]])
+// CHECK11-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK11-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11:       omp_offload.failed15:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l51(i32 [[TMP42]], i32* [[TMP43]], i32* [[TMP44]], i32* [[TMP45]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
+// CHECK11:       omp_offload.cont16:
+// CHECK11-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP82]], i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = load i32, i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP84]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP92]], align 4
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP95]], align 4
+// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP97]], align 4
+// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP98]], align 4
+// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP86]], i32** [[TMP100]], align 4
+// CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP86]], i32** [[TMP102]], align 4
+// CHECK11-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP103]], align 4
+// CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP87]], i32** [[TMP105]], align 4
+// CHECK11-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP87]], i32** [[TMP107]], align 4
+// CHECK11-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK11-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP88]], i32** [[TMP110]], align 4
+// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP88]], i32** [[TMP112]], align 4
+// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK11-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[SUB24:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK11-NEXT:    [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[DIV25]], 1
+// CHECK11-NEXT:    store i32 [[SUB26]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK11-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD27]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK11-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 4
+// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 4
+// CHECK11-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64** [[TMP124]], align 4
+// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i64** [[TMP125]], align 4
+// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP126]], align 4
+// CHECK11-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP127]], align 4
+// CHECK11-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]])
+// CHECK11-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK11-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11:       omp_offload.failed29:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l59(i32 [[TMP83]], i32 [[TMP85]], i32* [[TMP86]], i32* [[TMP87]], i32* [[TMP88]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
+// CHECK11:       omp_offload.cont30:
+// CHECK11-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP130]], i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP132:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP133:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP134:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP136]], align 4
+// CHECK11-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP138]], align 4
+// CHECK11-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK11-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP132]], i32** [[TMP141]], align 4
+// CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP132]], i32** [[TMP143]], align 4
+// CHECK11-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP144]], align 4
+// CHECK11-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP133]], i32** [[TMP146]], align 4
+// CHECK11-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP133]], i32** [[TMP148]], align 4
+// CHECK11-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP149]], align 4
+// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP134]], i32** [[TMP151]], align 4
+// CHECK11-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP134]], i32** [[TMP153]], align 4
+// CHECK11-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK11-NEXT:    [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1
+// CHECK11-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[DIV39]], 1
+// CHECK11-NEXT:    store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK11-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD41]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK11-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 4
+// CHECK11-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 4
+// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64** [[TMP165]], align 4
+// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i64** [[TMP166]], align 4
+// CHECK11-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP167]], align 4
+// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP168]], align 4
+// CHECK11-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]])
+// CHECK11-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK11-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK11:       omp_offload.failed43:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l67(i32 [[TMP131]], i32* [[TMP132]], i32* [[TMP133]], i32* [[TMP134]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
+// CHECK11:       omp_offload.cont44:
+// CHECK11-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP171]], i32* [[CH_CASTED45]], align 4
+// CHECK11-NEXT:    [[TMP172:%.*]] = load i32, i32* [[CH_CASTED45]], align 4
 // CHECK11-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[SUB50:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK11-NEXT:    [[DIV51:%.*]] = sdiv i32 [[SUB50]], 1
-// CHECK11-NEXT:    [[SUB52:%.*]] = sub nsw i32 [[DIV51]], 1
-// CHECK11-NEXT:    store i32 [[SUB52]], i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[ADD53:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK11-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD53]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK11-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK11-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED54:%.*]], label [[OMP_OFFLOAD_CONT55:%.*]]
-// CHECK11:       omp_offload.failed54:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32 [[TMP140]], i32 [[TMP142]], i32* [[TMP143]], i32* [[TMP144]], i32* [[TMP145]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT55]]
-// CHECK11:       omp_offload.cont55:
-// CHECK11-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP179]], i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP180:%.*]] = load i32, i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP181:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP182:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP183:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP185]], align 4
-// CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP187]], align 4
-// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP188]], align 4
-// CHECK11-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP181]], i32** [[TMP190]], align 4
-// CHECK11-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP181]], i32** [[TMP192]], align 4
-// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP193]], align 4
-// CHECK11-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP182]], i32** [[TMP195]], align 4
-// CHECK11-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP182]], i32** [[TMP197]], align 4
-// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP198]], align 4
-// CHECK11-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP183]], i32** [[TMP200]], align 4
-// CHECK11-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP183]], i32** [[TMP202]], align 4
-// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP203]], align 4
-// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[SUB63:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK11-NEXT:    [[DIV64:%.*]] = sdiv i32 [[SUB63]], 1
-// CHECK11-NEXT:    [[SUB65:%.*]] = sub nsw i32 [[DIV64]], 1
-// CHECK11-NEXT:    store i32 [[SUB65]], i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK11-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD66]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK11-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK11-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED67:%.*]], label [[OMP_OFFLOAD_CONT68:%.*]]
-// CHECK11:       omp_offload.failed67:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83(i32 [[TMP180]], i32* [[TMP181]], i32* [[TMP182]], i32* [[TMP183]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT68]]
-// CHECK11:       omp_offload.cont68:
-// CHECK11-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP212]], i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP213:%.*]] = load i32, i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP214]], i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP215:%.*]] = load i32, i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP216:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP217:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP218:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP220]], align 4
-// CHECK11-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP222]], align 4
-// CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP223]], align 4
-// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP175:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP176:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP177:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP179]], align 4
+// CHECK11-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP181]], align 4
+// CHECK11-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP182]], align 4
+// CHECK11-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP184]], align 4
+// CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP186]], align 4
+// CHECK11-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP187]], align 4
+// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP175]], i32** [[TMP189]], align 4
+// CHECK11-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP175]], i32** [[TMP191]], align 4
+// CHECK11-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP192]], align 4
+// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP176]], i32** [[TMP194]], align 4
+// CHECK11-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP176]], i32** [[TMP196]], align 4
+// CHECK11-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP197]], align 4
+// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP177]], i32** [[TMP199]], align 4
+// CHECK11-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP177]], i32** [[TMP201]], align 4
+// CHECK11-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP202]], align 4
+// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[SUB53:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK11-NEXT:    [[DIV54:%.*]] = sdiv i32 [[SUB53]], 1
+// CHECK11-NEXT:    [[SUB55:%.*]] = sub nsw i32 [[DIV54]], 1
+// CHECK11-NEXT:    store i32 [[SUB55]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK11-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD56]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK11-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK11-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 4
+// CHECK11-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 4
+// CHECK11-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64** [[TMP213]], align 4
+// CHECK11-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i64** [[TMP214]], align 4
+// CHECK11-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP215]], align 4
+// CHECK11-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP216]], align 4
+// CHECK11-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]])
+// CHECK11-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK11-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK11:       omp_offload.failed58:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32 [[TMP172]], i32 [[TMP174]], i32* [[TMP175]], i32* [[TMP176]], i32* [[TMP177]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
+// CHECK11:       omp_offload.cont59:
+// CHECK11-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP219]], i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP220:%.*]] = load i32, i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP221:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP222:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP223:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP225]], align 4
-// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP225]], align 4
+// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP227]], align 4
-// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP227]], align 4
+// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP228]], align 4
-// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP216]], i32** [[TMP230]], align 4
-// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 2
+// CHECK11-NEXT:    store i32* [[TMP221]], i32** [[TMP230]], align 4
+// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP216]], i32** [[TMP232]], align 4
-// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 2
+// CHECK11-NEXT:    store i32* [[TMP221]], i32** [[TMP232]], align 4
+// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 1
 // CHECK11-NEXT:    store i8* null, i8** [[TMP233]], align 4
-// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP217]], i32** [[TMP235]], align 4
-// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 3
+// CHECK11-NEXT:    store i32* [[TMP222]], i32** [[TMP235]], align 4
+// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP217]], i32** [[TMP237]], align 4
-// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 3
+// CHECK11-NEXT:    store i32* [[TMP222]], i32** [[TMP237]], align 4
+// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 2
 // CHECK11-NEXT:    store i8* null, i8** [[TMP238]], align 4
-// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP218]], i32** [[TMP240]], align 4
-// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 4
+// CHECK11-NEXT:    store i32* [[TMP223]], i32** [[TMP240]], align 4
+// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP218]], i32** [[TMP242]], align 4
-// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 4
+// CHECK11-NEXT:    store i32* [[TMP223]], i32** [[TMP242]], align 4
+// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 3
 // CHECK11-NEXT:    store i8* null, i8** [[TMP243]], align 4
-// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[SUB77:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK11-NEXT:    [[DIV78:%.*]] = sdiv i32 [[SUB77]], 1
-// CHECK11-NEXT:    [[SUB79:%.*]] = sub nsw i32 [[DIV78]], 1
-// CHECK11-NEXT:    store i32 [[SUB79]], i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[ADD80:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD80]] to i64
+// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[SUB67:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK11-NEXT:    [[DIV68:%.*]] = sdiv i32 [[SUB67]], 1
+// CHECK11-NEXT:    [[SUB69:%.*]] = sub nsw i32 [[DIV68]], 1
+// CHECK11-NEXT:    store i32 [[SUB69]], i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD70]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK11-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK11-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED81:%.*]], label [[OMP_OFFLOAD_CONT82:%.*]]
-// CHECK11:       omp_offload.failed81:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91(i32 [[TMP213]], i32 [[TMP215]], i32* [[TMP216]], i32* [[TMP217]], i32* [[TMP218]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT82]]
-// CHECK11:       omp_offload.cont82:
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK11-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK11-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 4
+// CHECK11-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 4
+// CHECK11-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64** [[TMP254]], align 4
+// CHECK11-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i64** [[TMP255]], align 4
+// CHECK11-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP256]], align 4
+// CHECK11-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP257]], align 4
+// CHECK11-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]])
+// CHECK11-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK11-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED72:%.*]], label [[OMP_OFFLOAD_CONT73:%.*]]
+// CHECK11:       omp_offload.failed72:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l83(i32 [[TMP220]], i32* [[TMP221]], i32* [[TMP222]], i32* [[TMP223]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT73]]
+// CHECK11:       omp_offload.cont73:
+// CHECK11-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP260]], i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP261:%.*]] = load i32, i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP262]], i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP263:%.*]] = load i32, i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP264:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP265:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP266:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP268]], align 4
+// CHECK11-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP270]], align 4
+// CHECK11-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP271]], align 4
+// CHECK11-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP273]], align 4
+// CHECK11-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP275]], align 4
+// CHECK11-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP276]], align 4
+// CHECK11-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP264]], i32** [[TMP278]], align 4
+// CHECK11-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP264]], i32** [[TMP280]], align 4
+// CHECK11-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP281]], align 4
+// CHECK11-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP265]], i32** [[TMP283]], align 4
+// CHECK11-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP265]], i32** [[TMP285]], align 4
+// CHECK11-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP286]], align 4
+// CHECK11-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP266]], i32** [[TMP288]], align 4
+// CHECK11-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP266]], i32** [[TMP290]], align 4
+// CHECK11-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP291]], align 4
+// CHECK11-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[SUB82:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK11-NEXT:    [[DIV83:%.*]] = sdiv i32 [[SUB82]], 1
+// CHECK11-NEXT:    [[SUB84:%.*]] = sub nsw i32 [[DIV83]], 1
+// CHECK11-NEXT:    store i32 [[SUB84]], i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK11-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD85]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK11-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK11-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 4
+// CHECK11-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 4
+// CHECK11-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64** [[TMP302]], align 4
+// CHECK11-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i64** [[TMP303]], align 4
+// CHECK11-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP304]], align 4
+// CHECK11-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP305]], align 4
+// CHECK11-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]])
+// CHECK11-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK11-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED87:%.*]], label [[OMP_OFFLOAD_CONT88:%.*]]
+// CHECK11:       omp_offload.failed87:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l91(i32 [[TMP261]], i32 [[TMP263]], i32* [[TMP264]], i32* [[TMP265]], i32* [[TMP266]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT88]]
+// CHECK11:       omp_offload.cont88:
 // CHECK11-NEXT:    ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index bf9ab6077eabc..508dbb6a68de6 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -868,9 +868,26 @@ int main() {
 // CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK8-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK8-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK8-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK8-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK8-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK8-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 8
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 8
+// CHECK8-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 8
+// CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 8
+// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP42]], align 8
+// CHECK8-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP43]], align 8
+// CHECK8-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK8-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK8-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i64 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -878,18 +895,18 @@ int main() {
 // CHECK8-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK8-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK8-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK8-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK8-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK8:       arraydestroy.body:
-// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK8-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK8-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK8:       arraydestroy.done3:
 // CHECK8-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK8-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK8-NEXT:    ret i32 [[TMP39]]
+// CHECK8-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK8-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1300,27 +1317,44 @@ int main() {
 // CHECK8-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK8-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK8-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK8-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK8-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK8-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK8-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK8-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK8-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK8:       omp_offload.cont:
 // CHECK8-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK8-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK8-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK8:       arraydestroy.body:
-// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK8-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK8-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK8:       arraydestroy.done2:
 // CHECK8-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK8-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK8-NEXT:    ret i32 [[TMP32]]
+// CHECK8-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK8-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1804,9 +1838,26 @@ int main() {
 // CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK10-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK10-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK10-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK10-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 4
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK10-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 4
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 4
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 4
+// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP42]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP43]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK10-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK10-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i32 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1814,18 +1865,18 @@ int main() {
 // CHECK10-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK10-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK10-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK10:       arraydestroy.body:
-// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK10-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK10-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK10-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK10:       arraydestroy.done2:
 // CHECK10-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK10-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP39]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -2223,27 +2274,44 @@ int main() {
 // CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK10-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK10-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK10-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK10-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK10-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP35]], align 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP36]], align 4
+// CHECK10-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK10-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK10-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK10-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK10-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK10:       arraydestroy.body:
-// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK10-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK10-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK10-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK10:       arraydestroy.done2:
 // CHECK10-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP32]]
+// CHECK10-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev

diff  --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index d67036ebe104c..3718fc9acd42a 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -126,21 +126,55 @@ int main() {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52() #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
 // CHECK1-NEXT:    ret void
 //
 //
@@ -434,48 +468,99 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
-// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94() #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* @Arg, align 4
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* @Arg, align 4
 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV]], align 4
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP7]], align 8
-// CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP9]], align 8
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT:    store i8* null, i8** [[TMP10]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 [[TMP20]], i32* [[CONV]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP21]], i64* [[TMP23]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP21]], i64* [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP13:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, i32 1, i8** [[TMP11]], i8** [[TMP12]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-// CHECK1-NEXT:    br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
-// CHECK1:       omp_offload.failed5:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103(i64 [[TMP5]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
-// CHECK1:       omp_offload.cont6:
-// CHECK1-NEXT:    [[TMP15:%.*]] = load i32, i32* @Arg, align 4
-// CHECK1-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP15]])
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 1, i32* [[TMP30]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]])
+// CHECK1-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK1-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103(i64 [[TMP21]]) #[[ATTR2]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i32, i32* @Arg, align 4
+// CHECK1-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP39]])
 // CHECK1-NEXT:    ret i32 [[CALL]]
 //
 //
@@ -928,46 +1013,97 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
-// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 [[ARG]], i32* [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69() #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARG_CASTED]] to i32*
-// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV]], align 4
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP7]], align 8
-// CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP9]], align 8
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT:    store i8* null, i8** [[TMP10]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 [[TMP20]], i32* [[CONV]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, i64* [[ARG_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP21]], i64* [[TMP23]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP21]], i64* [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP13:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75.region_id, i32 1, i8** [[TMP11]], i8** [[TMP12]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
-// CHECK1-NEXT:    br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
-// CHECK1:       omp_offload.failed5:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75(i64 [[TMP5]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
-// CHECK1:       omp_offload.cont6:
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 1, i32* [[TMP30]], align 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS6]])
+// CHECK1-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK1-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75(i64 [[TMP21]]) #[[ATTR2]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
 // CHECK1-NEXT:    ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index 26648303aa733..e8c88d0f4e542 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -832,9 +832,26 @@ int main() {
 // CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK9-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 8
+// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP43]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK9-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i64 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -842,18 +859,18 @@ int main() {
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done3:
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP39]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1296,27 +1313,44 @@ int main() {
 // CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK9-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK9-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done2:
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP32]]
+// CHECK9-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1835,9 +1869,26 @@ int main() {
 // CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK11-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 4
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP42]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK11-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l123(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i32 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1845,18 +1896,18 @@ int main() {
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP39]]
+// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -2290,27 +2341,44 @@ int main() {
 // CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK11-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK11-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP32]]
+// CHECK11-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev

diff  --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 346c8cf6e2343..c4fc3395debdf 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -114,64 +114,98 @@ int main() {
 // CHECK1:       invoke.cont:
 // CHECK1-NEXT:    store i8 [[CALL]], i8* [[A]], align 1
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68() #[[ATTR6:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       lpad:
-// CHECK1-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK1-NEXT:    [[TMP10:%.*]] = landingpad { i8*, i32 }
 // CHECK1-NEXT:    cleanup
-// CHECK1-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-// CHECK1-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
-// CHECK1-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
-// CHECK1-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
+// CHECK1-NEXT:    store i8* [[TMP11]], i8** [[EXN_SLOT]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 1
+// CHECK1-NEXT:    store i32 [[TMP12]], i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
 // CHECK1-NEXT:    br label [[EH_RESUME:%.*]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A]], align 1
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i8, i8* [[A]], align 1
 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i8*
-// CHECK1-NEXT:    store i8 [[TMP5]], i8* [[CONV]], align 1
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[A_CASTED]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP8]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP10]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT:    store i8* null, i8** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i8 [[TMP13]], i8* [[CONV]], align 1
+// CHECK1-NEXT:    [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP14]], i64* [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP14]], i64* [[TMP18]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, i32 1, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
-// CHECK1-NEXT:    br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP6]]) #[[ATTR6]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
-// CHECK1-NEXT:    [[TMP16:%.*]] = load i8, i8* [[A]], align 1
-// CHECK1-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP16]] to i32
-// CHECK1-NEXT:    [[CALL6:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
-// CHECK1-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
-// CHECK1:       invoke.cont5:
-// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV4]], [[CALL6]]
-// CHECK1-NEXT:    [[CALL8:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
-// CHECK1-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD]]
-// CHECK1:       invoke.cont7:
-// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD]], [[CALL8]]
-// CHECK1-NEXT:    store i32 [[ADD9]], i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP22]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 1, i32* [[TMP23]], align 4
+// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** [[TMP21]], i8*** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP26]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP28]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP29]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK1-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP14]]) #[[ATTR6]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i8, i8* [[A]], align 1
+// CHECK1-NEXT:    [[CONV5:%.*]] = sext i8 [[TMP32]] to i32
+// CHECK1-NEXT:    [[CALL7:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
+// CHECK1-NEXT:    to label [[INVOKE_CONT6:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont6:
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV5]], [[CALL7]]
+// CHECK1-NEXT:    [[CALL9:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
+// CHECK1-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD]]
+// CHECK1:       invoke.cont8:
+// CHECK1-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD]], [[CALL9]]
+// CHECK1-NEXT:    store i32 [[ADD10]], i32* [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
-// CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK1-NEXT:    ret i32 [[TMP17]]
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    ret i32 [[TMP33]]
 // CHECK1:       eh.resume:
 // CHECK1-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
 // CHECK1-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK1-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK1-NEXT:    [[LPAD_VAL10:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL10]]
+// CHECK1-NEXT:    [[LPAD_VAL11:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK1-NEXT:    resume { i8*, i32 } [[LPAD_VAL11]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1El
@@ -515,21 +549,55 @@ int main() {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57() #[[ATTR6]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -539,21 +607,55 @@ int main() {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57() #[[ATTR6]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -1221,64 +1323,98 @@ int main() {
 // CHECK5:       invoke.cont:
 // CHECK5-NEXT:    store i8 [[CALL]], i8* [[A]], align 1
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK5-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK5-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK5-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK5-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK5-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK5-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68() #[[ATTR6:[0-9]+]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       lpad:
-// CHECK5-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK5-NEXT:    [[TMP10:%.*]] = landingpad { i8*, i32 }
 // CHECK5-NEXT:    cleanup
-// CHECK5-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-// CHECK5-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
-// CHECK5-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
-// CHECK5-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK5-NEXT:    [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
+// CHECK5-NEXT:    store i8* [[TMP11]], i8** [[EXN_SLOT]], align 8
+// CHECK5-NEXT:    [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 1
+// CHECK5-NEXT:    store i32 [[TMP12]], i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK5-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
 // CHECK5-NEXT:    br label [[EH_RESUME:%.*]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A]], align 1
+// CHECK5-NEXT:    [[TMP13:%.*]] = load i8, i8* [[A]], align 1
 // CHECK5-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i8*
-// CHECK5-NEXT:    store i8 [[TMP5]], i8* [[CONV]], align 1
-// CHECK5-NEXT:    [[TMP6:%.*]] = load i64, i64* [[A_CASTED]], align 8
-// CHECK5-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP6]], i64* [[TMP8]], align 8
-// CHECK5-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP6]], i64* [[TMP10]], align 8
-// CHECK5-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK5-NEXT:    store i8* null, i8** [[TMP11]], align 8
-// CHECK5-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    store i8 [[TMP13]], i8* [[CONV]], align 1
+// CHECK5-NEXT:    [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
+// CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP14]], i64* [[TMP16]], align 8
+// CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP14]], i64* [[TMP18]], align 8
+// CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK5-NEXT:    store i8* null, i8** [[TMP19]], align 8
+// CHECK5-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, i32 1, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
-// CHECK5-NEXT:    br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK5:       omp_offload.failed2:
-// CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP6]]) #[[ATTR6]]
-// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK5:       omp_offload.cont3:
-// CHECK5-NEXT:    [[TMP16:%.*]] = load i8, i8* [[A]], align 1
-// CHECK5-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP16]] to i32
-// CHECK5-NEXT:    [[CALL6:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
-// CHECK5-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
-// CHECK5:       invoke.cont5:
-// CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV4]], [[CALL6]]
-// CHECK5-NEXT:    [[CALL8:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
-// CHECK5-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD]]
-// CHECK5:       invoke.cont7:
-// CHECK5-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD]], [[CALL8]]
-// CHECK5-NEXT:    store i32 [[ADD9]], i32* [[RETVAL]], align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK5-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP22]], align 4
+// CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 1, i32* [[TMP23]], align 4
+// CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
+// CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** [[TMP21]], i8*** [[TMP25]], align 8
+// CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP26]], align 8
+// CHECK5-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP27]], align 8
+// CHECK5-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP28]], align 8
+// CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP29]], align 8
+// CHECK5-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK5-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK5-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK5:       omp_offload.failed3:
+// CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP14]]) #[[ATTR6]]
+// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK5:       omp_offload.cont4:
+// CHECK5-NEXT:    [[TMP32:%.*]] = load i8, i8* [[A]], align 1
+// CHECK5-NEXT:    [[CONV5:%.*]] = sext i8 [[TMP32]] to i32
+// CHECK5-NEXT:    [[CALL7:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
+// CHECK5-NEXT:    to label [[INVOKE_CONT6:%.*]] unwind label [[LPAD]]
+// CHECK5:       invoke.cont6:
+// CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV5]], [[CALL7]]
+// CHECK5-NEXT:    [[CALL9:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
+// CHECK5-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD]]
+// CHECK5:       invoke.cont8:
+// CHECK5-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD]], [[CALL9]]
+// CHECK5-NEXT:    store i32 [[ADD10]], i32* [[RETVAL]], align 4
 // CHECK5-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
-// CHECK5-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK5-NEXT:    ret i32 [[TMP17]]
+// CHECK5-NEXT:    [[TMP33:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK5-NEXT:    ret i32 [[TMP33]]
 // CHECK5:       eh.resume:
 // CHECK5-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
 // CHECK5-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK5-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK5-NEXT:    [[LPAD_VAL10:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL10]]
+// CHECK5-NEXT:    [[LPAD_VAL11:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK5-NEXT:    resume { i8*, i32 } [[LPAD_VAL11]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@_ZN1SC1El
@@ -1622,21 +1758,55 @@ int main() {
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK5-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK5-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK5-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK5-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK5-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK5-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK5-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK5:       omp_offload.failed2:
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK5-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK5-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK5-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK5-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK5-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK5-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK5:       omp_offload.failed3:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57() #[[ATTR6]]
-// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK5:       omp_offload.cont3:
+// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK5:       omp_offload.cont4:
 // CHECK5-NEXT:    ret i32 0
 //
 //
@@ -1646,21 +1816,55 @@ int main() {
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK5-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK5-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK5-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK5-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK5-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK5-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK5-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK5-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK5-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK5:       omp_offload.failed2:
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK5-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK5-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK5-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK5-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK5-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK5-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK5-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK5-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK5-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK5-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK5-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK5:       omp_offload.failed3:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57() #[[ATTR6]]
-// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK5:       omp_offload.cont3:
+// CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK5:       omp_offload.cont4:
 // CHECK5-NEXT:    ret i32 0
 //
 //
@@ -2328,64 +2532,98 @@ int main() {
 // CHECK9:       invoke.cont:
 // CHECK9-NEXT:    store i8 [[CALL]], i8* [[A]], align 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK9-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK9-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68() #[[ATTR6:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       lpad:
-// CHECK9-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK9-NEXT:    [[TMP10:%.*]] = landingpad { i8*, i32 }
 // CHECK9-NEXT:    cleanup
-// CHECK9-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-// CHECK9-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
-// CHECK9-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
-// CHECK9-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK9-NEXT:    [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
+// CHECK9-NEXT:    store i8* [[TMP11]], i8** [[EXN_SLOT]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 1
+// CHECK9-NEXT:    store i32 [[TMP12]], i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK9-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
 // CHECK9-NEXT:    br label [[EH_RESUME:%.*]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A]], align 1
+// CHECK9-NEXT:    [[TMP13:%.*]] = load i8, i8* [[A]], align 1
 // CHECK9-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i8*
-// CHECK9-NEXT:    store i8 [[TMP5]], i8* [[CONV]], align 1
-// CHECK9-NEXT:    [[TMP6:%.*]] = load i64, i64* [[A_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP8]], align 8
-// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP10]], align 8
-// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    store i8 [[TMP13]], i8* [[CONV]], align 1
+// CHECK9-NEXT:    [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP14]], i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP14]], i64* [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP19]], align 8
+// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, i32 1, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
-// CHECK9-NEXT:    br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK9:       omp_offload.failed2:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP6]]) #[[ATTR6]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK9:       omp_offload.cont3:
-// CHECK9-NEXT:    [[TMP16:%.*]] = load i8, i8* [[A]], align 1
-// CHECK9-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP16]] to i32
-// CHECK9-NEXT:    [[CALL6:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
-// CHECK9-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
-// CHECK9:       invoke.cont5:
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV4]], [[CALL6]]
-// CHECK9-NEXT:    [[CALL8:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
-// CHECK9-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD]]
-// CHECK9:       invoke.cont7:
-// CHECK9-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD]], [[CALL8]]
-// CHECK9-NEXT:    store i32 [[ADD9]], i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP22]], align 4
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 1, i32* [[TMP23]], align 4
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP21]], i8*** [[TMP25]], align 8
+// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP28]], align 8
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK9:       omp_offload.failed3:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP14]]) #[[ATTR6]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK9:       omp_offload.cont4:
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i8, i8* [[A]], align 1
+// CHECK9-NEXT:    [[CONV5:%.*]] = sext i8 [[TMP32]] to i32
+// CHECK9-NEXT:    [[CALL7:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
+// CHECK9-NEXT:    to label [[INVOKE_CONT6:%.*]] unwind label [[LPAD]]
+// CHECK9:       invoke.cont6:
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV5]], [[CALL7]]
+// CHECK9-NEXT:    [[CALL9:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
+// CHECK9-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD]]
+// CHECK9:       invoke.cont8:
+// CHECK9-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD]], [[CALL9]]
+// CHECK9-NEXT:    store i32 [[ADD10]], i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
-// CHECK9-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP17]]
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP33]]
 // CHECK9:       eh.resume:
 // CHECK9-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
 // CHECK9-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK9-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK9-NEXT:    [[LPAD_VAL10:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK9-NEXT:    resume { i8*, i32 } [[LPAD_VAL10]]
+// CHECK9-NEXT:    [[LPAD_VAL11:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK9-NEXT:    resume { i8*, i32 } [[LPAD_VAL11]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SC1El
@@ -2729,21 +2967,55 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK9-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK9-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK9-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK9:       omp_offload.failed2:
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK9-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK9-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK9-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK9:       omp_offload.failed3:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57() #[[ATTR6]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK9:       omp_offload.cont3:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK9:       omp_offload.cont4:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -2753,21 +3025,55 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK9-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK9-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK9-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK9-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK9:       omp_offload.failed2:
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK9-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK9-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK9-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK9:       omp_offload.failed3:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57() #[[ATTR6]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK9:       omp_offload.cont3:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK9:       omp_offload.cont4:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -3435,64 +3741,98 @@ int main() {
 // CHECK13:       invoke.cont:
 // CHECK13-NEXT:    store i8 [[CALL]], i8* [[A]], align 1
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK13-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK13-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK13-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK13-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK13-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68() #[[ATTR6:[0-9]+]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       lpad:
-// CHECK13-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+// CHECK13-NEXT:    [[TMP10:%.*]] = landingpad { i8*, i32 }
 // CHECK13-NEXT:    cleanup
-// CHECK13-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-// CHECK13-NEXT:    store i8* [[TMP3]], i8** [[EXN_SLOT]], align 8
-// CHECK13-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
-// CHECK13-NEXT:    store i32 [[TMP4]], i32* [[EHSELECTOR_SLOT]], align 4
+// CHECK13-NEXT:    [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0
+// CHECK13-NEXT:    store i8* [[TMP11]], i8** [[EXN_SLOT]], align 8
+// CHECK13-NEXT:    [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 1
+// CHECK13-NEXT:    store i32 [[TMP12]], i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK13-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
 // CHECK13-NEXT:    br label [[EH_RESUME:%.*]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A]], align 1
+// CHECK13-NEXT:    [[TMP13:%.*]] = load i8, i8* [[A]], align 1
 // CHECK13-NEXT:    [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i8*
-// CHECK13-NEXT:    store i8 [[TMP5]], i8* [[CONV]], align 1
-// CHECK13-NEXT:    [[TMP6:%.*]] = load i64, i64* [[A_CASTED]], align 8
-// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP6]], i64* [[TMP8]], align 8
-// CHECK13-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP6]], i64* [[TMP10]], align 8
-// CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP11]], align 8
-// CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    store i8 [[TMP13]], i8* [[CONV]], align 1
+// CHECK13-NEXT:    [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8
+// CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP14]], i64* [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP14]], i64* [[TMP18]], align 8
+// CHECK13-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP19]], align 8
+// CHECK13-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, i32 1, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
-// CHECK13-NEXT:    br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK13:       omp_offload.failed2:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP6]]) #[[ATTR6]]
-// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK13:       omp_offload.cont3:
-// CHECK13-NEXT:    [[TMP16:%.*]] = load i8, i8* [[A]], align 1
-// CHECK13-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP16]] to i32
-// CHECK13-NEXT:    [[CALL6:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
-// CHECK13-NEXT:    to label [[INVOKE_CONT5:%.*]] unwind label [[LPAD]]
-// CHECK13:       invoke.cont5:
-// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV4]], [[CALL6]]
-// CHECK13-NEXT:    [[CALL8:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
-// CHECK13-NEXT:    to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD]]
-// CHECK13:       invoke.cont7:
-// CHECK13-NEXT:    [[ADD9:%.*]] = add nsw i32 [[ADD]], [[CALL8]]
-// CHECK13-NEXT:    store i32 [[ADD9]], i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP22]], align 4
+// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 1, i32* [[TMP23]], align 4
+// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** [[TMP20]], i8*** [[TMP24]], align 8
+// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** [[TMP21]], i8*** [[TMP25]], align 8
+// CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP26]], align 8
+// CHECK13-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP27]], align 8
+// CHECK13-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP28]], align 8
+// CHECK13-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP29]], align 8
+// CHECK13-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK13-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK13-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK13:       omp_offload.failed3:
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74(i64 [[TMP14]]) #[[ATTR6]]
+// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK13:       omp_offload.cont4:
+// CHECK13-NEXT:    [[TMP32:%.*]] = load i8, i8* [[A]], align 1
+// CHECK13-NEXT:    [[CONV5:%.*]] = sext i8 [[TMP32]] to i32
+// CHECK13-NEXT:    [[CALL7:%.*]] = invoke noundef i32 @_Z5tmainIcLi5EEiv()
+// CHECK13-NEXT:    to label [[INVOKE_CONT6:%.*]] unwind label [[LPAD]]
+// CHECK13:       invoke.cont6:
+// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV5]], [[CALL7]]
+// CHECK13-NEXT:    [[CALL9:%.*]] = invoke noundef i32 @_Z5tmainI1SLi1EEiv()
+// CHECK13-NEXT:    to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD]]
+// CHECK13:       invoke.cont8:
+// CHECK13-NEXT:    [[ADD10:%.*]] = add nsw i32 [[ADD]], [[CALL9]]
+// CHECK13-NEXT:    store i32 [[ADD10]], i32* [[RETVAL]], align 4
 // CHECK13-NEXT:    call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[S]]) #[[ATTR6]]
-// CHECK13-NEXT:    [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    ret i32 [[TMP17]]
+// CHECK13-NEXT:    [[TMP33:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    ret i32 [[TMP33]]
 // CHECK13:       eh.resume:
 // CHECK13-NEXT:    [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8
 // CHECK13-NEXT:    [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4
 // CHECK13-NEXT:    [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0
-// CHECK13-NEXT:    [[LPAD_VAL10:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
-// CHECK13-NEXT:    resume { i8*, i32 } [[LPAD_VAL10]]
+// CHECK13-NEXT:    [[LPAD_VAL11:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1
+// CHECK13-NEXT:    resume { i8*, i32 } [[LPAD_VAL11]]
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@_ZN1SC1El
@@ -3836,21 +4176,55 @@ int main() {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK13-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK13-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK13-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK13-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK13-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK13-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK13:       omp_offload.failed2:
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK13-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK13-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK13-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK13-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK13:       omp_offload.failed3:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57() #[[ATTR6]]
-// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK13:       omp_offload.cont3:
+// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK13:       omp_offload.cont4:
 // CHECK13-NEXT:    ret i32 0
 //
 //
@@ -3860,21 +4234,55 @@ int main() {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK13-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK13-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK13-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK13-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK13-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 100)
-// CHECK13-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK13-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK13:       omp_offload.failed2:
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK13-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK13-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK13-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK13-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK13-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK13-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK13-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK13:       omp_offload.failed3:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57() #[[ATTR6]]
-// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK13:       omp_offload.cont3:
+// CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK13:       omp_offload.cont4:
 // CHECK13-NEXT:    ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index 528b70ce244fa..25e889e4fc278 100644
--- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -535,9 +535,26 @@ int main() {
 // CHECK9-NEXT:    store %struct.S* [[TEST]], %struct.S** [[VAR]], align 8
 // CHECK9-NEXT:    store %struct.S* undef, %struct.S** [[_TMP1]], align 8
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK9-NEXT:    br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP2]], align 4
+// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP4]], align 8
+// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP6]], align 8
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// CHECK9-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -545,18 +562,18 @@ int main() {
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done2:
 // CHECK9-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP4]]
+// CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP12]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -833,27 +850,44 @@ int main() {
 // CHECK9-NEXT:    store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8
 // CHECK9-NEXT:    store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK9-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK9-NEXT:    br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 0, i32* [[TMP2]], align 4
+// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP4]], align 8
+// CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* null, i64** [[TMP6]], align 8
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// CHECK9-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK9-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK9:       arraydestroy.body:
-// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK9-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK9-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK9-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK9:       arraydestroy.done2:
 // CHECK9-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP4]]
+// CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP12]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1207,9 +1241,26 @@ int main() {
 // CHECK11-NEXT:    store %struct.S* [[TEST]], %struct.S** [[VAR]], align 4
 // CHECK11-NEXT:    store %struct.S* undef, %struct.S** [[_TMP1]], align 4
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK11-NEXT:    br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP1]], align 4
+// CHECK11-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 0, i32* [[TMP2]], align 4
+// CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP3]], align 4
+// CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP4]], align 4
+// CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* null, i64** [[TMP5]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* null, i64** [[TMP6]], align 4
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP7]], align 4
+// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// CHECK11-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1217,18 +1268,18 @@ int main() {
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP4]]
+// CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP12]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1499,27 +1550,44 @@ int main() {
 // CHECK11-NEXT:    store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4
 // CHECK11-NEXT:    store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK11-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK11-NEXT:    br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP1]], align 4
+// CHECK11-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 0, i32* [[TMP2]], align 4
+// CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP3]], align 4
+// CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP4]], align 4
+// CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* null, i64** [[TMP5]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* null, i64** [[TMP6]], align 4
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP7]], align 4
+// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
+// CHECK11-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK11-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK11:       arraydestroy.body:
-// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK11-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK11-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK11-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK11:       arraydestroy.done2:
 // CHECK11-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP4]]
+// CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP12]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev

diff  --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 746ecc3efe07d..4d6570de7a42b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -61,21 +61,55 @@ int main() {
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 1000)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 1000)
-// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
-// CHECK1:       omp_offload.failed2:
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS2]])
+// CHECK1-NEXT:    [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
+// CHECK1-NEXT:    br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
+// CHECK1:       omp_offload.failed3:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41() #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT3]]
-// CHECK1:       omp_offload.cont3:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT4]]
+// CHECK1:       omp_offload.cont4:
 // CHECK1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK1-NEXT:    ret i32 [[CALL]]
 //
@@ -361,9 +395,26 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 1000)
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 1, i32* [[TMP0]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 0, i32* [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store i64* null, i64** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store i64* null, i64** [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store i8** null, i8*** [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]

diff  --git a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
index 34d328c9cae04..15aff901fcfa5 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
@@ -63,7 +63,7 @@ int main() {
 }
 
 // CHECK-LABEL: main
-// CHECK: call{{.+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
+// CHECK: call{{.+}} @__tgt_target_kernel(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFL:@.+]](
 // CHECK: call{{.+}} [[TMAIN:@.+]](i{{32|64}}
 // CHECK: ret
@@ -82,7 +82,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]](i{{32|64}}
-// CHECK: call{{.+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
+// CHECK: call{{.+}} @__tgt_target_kernel(%struct.ident_t* @{{.+}},
 // CHECK: call void [[TOFFL:@.+]](
 // CHECK: ret
 

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index 8736aa7dd335a..f41e4677816e3 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -4700,43 +4700,43 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS21:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS22:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS23:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED32:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP37:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED46:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED48:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_54:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED62:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS64:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS65:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS66:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP67:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_68:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_69:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED76:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED78:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS80:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS81:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS82:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP83:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_84:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_85:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED49:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED51:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS53:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS54:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS55:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED66:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS68:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS69:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS70:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP71:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_72:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_73:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED81:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED83:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS85:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS86:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS87:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP88:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_89:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_90:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK9-NEXT:    store i32 100, i32* [[CH]], align 4
@@ -4792,403 +4792,522 @@ int main() {
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368(i64 [[TMP1]], double* [[TMP2]], double* [[TMP3]], double* [[TMP4]]) #[[ATTR2:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP37:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to double**
-// CHECK9-NEXT:    store double* [[TMP35]], double** [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to double**
-// CHECK9-NEXT:    store double* [[TMP35]], double** [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
-// CHECK9-NEXT:    store double* [[TMP36]], double** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK9-NEXT:    store double* [[TMP36]], double** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    store i32 [[TMP41]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP49]], align 8
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
+// CHECK9-NEXT:    store double* [[TMP43]], double** [[TMP52]], align 8
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK9-NEXT:    store double* [[TMP37]], double** [[TMP54]], align 8
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to double**
-// CHECK9-NEXT:    store double* [[TMP37]], double** [[TMP56]], align 8
-// CHECK9-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK9-NEXT:    store double* [[TMP43]], double** [[TMP54]], align 8
+// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP55]], align 8
+// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
+// CHECK9-NEXT:    store double* [[TMP44]], double** [[TMP57]], align 8
+// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to double**
+// CHECK9-NEXT:    store double* [[TMP44]], double** [[TMP59]], align 8
+// CHECK9-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP60]], align 8
+// CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double**
+// CHECK9-NEXT:    store double* [[TMP45]], double** [[TMP62]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to double**
+// CHECK9-NEXT:    store double* [[TMP45]], double** [[TMP64]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP65]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK9-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK9-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK9-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK9-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK9-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK9-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
-// CHECK9:       omp_offload.failed15:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407(i64 [[TMP34]], double* [[TMP35]], double* [[TMP36]], double* [[TMP37]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
-// CHECK9:       omp_offload.cont16:
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV17:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV17]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[CH_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to double**
-// CHECK9-NEXT:    store double* [[TMP70]], double** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to double**
-// CHECK9-NEXT:    store double* [[TMP70]], double** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
-// CHECK9-NEXT:    store double* [[TMP71]], double** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to double**
-// CHECK9-NEXT:    store double* [[TMP71]], double** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK9-NEXT:    store double* [[TMP72]], double** [[TMP94]], align 8
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to double**
-// CHECK9-NEXT:    store double* [[TMP72]], double** [[TMP96]], align 8
-// CHECK9-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK9-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
-// CHECK9-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK9-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
-// CHECK9:       omp_offload.failed30:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446(i64 [[TMP67]], i64 [[TMP69]], double* [[TMP70]], double* [[TMP71]], double* [[TMP72]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
-// CHECK9:       omp_offload.cont31:
-// CHECK9-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV33:%.*]] = bitcast i64* [[N_CASTED32]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP106]], i32* [[CONV33]], align 4
-// CHECK9-NEXT:    [[TMP107:%.*]] = load i64, i64* [[N_CASTED32]], align 8
-// CHECK9-NEXT:    [[TMP108:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP109:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP110:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP112]], align 8
-// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP114]], align 8
-// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK9-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to double**
-// CHECK9-NEXT:    store double* [[TMP108]], double** [[TMP117]], align 8
-// CHECK9-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to double**
-// CHECK9-NEXT:    store double* [[TMP108]], double** [[TMP119]], align 8
-// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP120]], align 8
-// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to double**
-// CHECK9-NEXT:    store double* [[TMP109]], double** [[TMP122]], align 8
-// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to double**
-// CHECK9-NEXT:    store double* [[TMP109]], double** [[TMP124]], align 8
-// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP125]], align 8
-// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
-// CHECK9-NEXT:    store double* [[TMP110]], double** [[TMP127]], align 8
-// CHECK9-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
-// CHECK9-NEXT:    store double* [[TMP110]], double** [[TMP129]], align 8
-// CHECK9-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK9-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK9-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
-// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
-// CHECK9-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK9-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK9-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK9-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
-// CHECK9:       omp_offload.failed44:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477(i64 [[TMP107]], double* [[TMP108]], double* [[TMP109]], double* [[TMP110]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
-// CHECK9:       omp_offload.cont45:
-// CHECK9-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV47:%.*]] = bitcast i64* [[CH_CASTED46]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP139]], i32* [[CONV47]], align 4
-// CHECK9-NEXT:    [[TMP140:%.*]] = load i64, i64* [[CH_CASTED46]], align 8
-// CHECK9-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV49:%.*]] = bitcast i64* [[N_CASTED48]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP141]], i32* [[CONV49]], align 4
-// CHECK9-NEXT:    [[TMP142:%.*]] = load i64, i64* [[N_CASTED48]], align 8
-// CHECK9-NEXT:    [[TMP143:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP144:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP145:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP147]], align 8
-// CHECK9-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP149]], align 8
-// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP150]], align 8
-// CHECK9-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP152]], align 8
-// CHECK9-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP154]], align 8
-// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to double**
-// CHECK9-NEXT:    store double* [[TMP143]], double** [[TMP157]], align 8
-// CHECK9-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to double**
-// CHECK9-NEXT:    store double* [[TMP143]], double** [[TMP159]], align 8
-// CHECK9-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to double**
-// CHECK9-NEXT:    store double* [[TMP144]], double** [[TMP162]], align 8
-// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to double**
-// CHECK9-NEXT:    store double* [[TMP144]], double** [[TMP164]], align 8
-// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP165]], align 8
-// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to double**
-// CHECK9-NEXT:    store double* [[TMP145]], double** [[TMP167]], align 8
-// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to double**
-// CHECK9-NEXT:    store double* [[TMP145]], double** [[TMP169]], align 8
-// CHECK9-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP170]], align 8
-// CHECK9-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]])
+// CHECK9-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK9-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9:       omp_offload.failed16:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407(i64 [[TMP42]], double* [[TMP43]], double* [[TMP44]], double* [[TMP45]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
+// CHECK9:       omp_offload.cont17:
+// CHECK9-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP82]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP83:%.*]] = load i64, i64* [[CH_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP90]], align 8
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP92]], align 8
+// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP93]], align 8
+// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP95]], align 8
+// CHECK9-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP97]], align 8
+// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP98]], align 8
+// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to double**
+// CHECK9-NEXT:    store double* [[TMP86]], double** [[TMP100]], align 8
+// CHECK9-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to double**
+// CHECK9-NEXT:    store double* [[TMP86]], double** [[TMP102]], align 8
+// CHECK9-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP103]], align 8
+// CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to double**
+// CHECK9-NEXT:    store double* [[TMP87]], double** [[TMP105]], align 8
+// CHECK9-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to double**
+// CHECK9-NEXT:    store double* [[TMP87]], double** [[TMP107]], align 8
+// CHECK9-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK9-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to double**
+// CHECK9-NEXT:    store double* [[TMP88]], double** [[TMP110]], align 8
+// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to double**
+// CHECK9-NEXT:    store double* [[TMP88]], double** [[TMP112]], align 8
+// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK9-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
+// CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK9-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK9-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 8
+// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 8
+// CHECK9-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP124]], align 8
+// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP125]], align 8
+// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP126]], align 8
+// CHECK9-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP127]], align 8
+// CHECK9-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]])
+// CHECK9-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK9-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9:       omp_offload.failed32:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446(i64 [[TMP83]], i64 [[TMP85]], double* [[TMP86]], double* [[TMP87]], double* [[TMP88]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
+// CHECK9:       omp_offload.cont33:
+// CHECK9-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP130]], i32* [[CONV35]], align 4
+// CHECK9-NEXT:    [[TMP131:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK9-NEXT:    [[TMP132:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP133:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP134:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP136]], align 8
+// CHECK9-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP138]], align 8
+// CHECK9-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP139]], align 8
+// CHECK9-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
+// CHECK9-NEXT:    store double* [[TMP132]], double** [[TMP141]], align 8
+// CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
+// CHECK9-NEXT:    store double* [[TMP132]], double** [[TMP143]], align 8
+// CHECK9-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP144]], align 8
+// CHECK9-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to double**
+// CHECK9-NEXT:    store double* [[TMP133]], double** [[TMP146]], align 8
+// CHECK9-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to double**
+// CHECK9-NEXT:    store double* [[TMP133]], double** [[TMP148]], align 8
+// CHECK9-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP149]], align 8
+// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to double**
+// CHECK9-NEXT:    store double* [[TMP134]], double** [[TMP151]], align 8
+// CHECK9-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to double**
+// CHECK9-NEXT:    store double* [[TMP134]], double** [[TMP153]], align 8
+// CHECK9-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK9-NEXT:    [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1
+// CHECK9-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[DIV43]], 1
+// CHECK9-NEXT:    store i32 [[SUB44]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK9-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD45]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK9-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK9-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 8
+// CHECK9-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 8
+// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64** [[TMP165]], align 8
+// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i64** [[TMP166]], align 8
+// CHECK9-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP167]], align 8
+// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP168]], align 8
+// CHECK9-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]])
+// CHECK9-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK9-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK9:       omp_offload.failed47:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477(i64 [[TMP131]], double* [[TMP132]], double* [[TMP133]], double* [[TMP134]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
+// CHECK9:       omp_offload.cont48:
+// CHECK9-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV50:%.*]] = bitcast i64* [[CH_CASTED49]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP171]], i32* [[CONV50]], align 4
+// CHECK9-NEXT:    [[TMP172:%.*]] = load i64, i64* [[CH_CASTED49]], align 8
 // CHECK9-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK9-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
-// CHECK9-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
-// CHECK9-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK9-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK9-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK9-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
-// CHECK9:       omp_offload.failed60:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505(i64 [[TMP140]], i64 [[TMP142]], double* [[TMP143]], double* [[TMP144]], double* [[TMP145]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
-// CHECK9:       omp_offload.cont61:
-// CHECK9-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV63:%.*]] = bitcast i64* [[N_CASTED62]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP179]], i32* [[CONV63]], align 4
-// CHECK9-NEXT:    [[TMP180:%.*]] = load i64, i64* [[N_CASTED62]], align 8
-// CHECK9-NEXT:    [[TMP181:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP182:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP183:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP185]], align 8
-// CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP187]], align 8
-// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP188]], align 8
-// CHECK9-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to double**
-// CHECK9-NEXT:    store double* [[TMP181]], double** [[TMP190]], align 8
-// CHECK9-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to double**
-// CHECK9-NEXT:    store double* [[TMP181]], double** [[TMP192]], align 8
-// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP193]], align 8
-// CHECK9-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to double**
-// CHECK9-NEXT:    store double* [[TMP182]], double** [[TMP195]], align 8
-// CHECK9-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to double**
-// CHECK9-NEXT:    store double* [[TMP182]], double** [[TMP197]], align 8
-// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP198]], align 8
-// CHECK9-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to double**
-// CHECK9-NEXT:    store double* [[TMP183]], double** [[TMP200]], align 8
-// CHECK9-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to double**
-// CHECK9-NEXT:    store double* [[TMP183]], double** [[TMP202]], align 8
-// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP203]], align 8
-// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[SUB70:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK9-NEXT:    [[DIV71:%.*]] = sdiv i32 [[SUB70]], 1
-// CHECK9-NEXT:    [[SUB72:%.*]] = sub nsw i32 [[DIV71]], 1
-// CHECK9-NEXT:    store i32 [[SUB72]], i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[ADD73:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK9-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD73]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK9-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK9-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED74:%.*]], label [[OMP_OFFLOAD_CONT75:%.*]]
-// CHECK9:       omp_offload.failed74:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535(i64 [[TMP180]], double* [[TMP181]], double* [[TMP182]], double* [[TMP183]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT75]]
-// CHECK9:       omp_offload.cont75:
-// CHECK9-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV77:%.*]] = bitcast i64* [[CH_CASTED76]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP212]], i32* [[CONV77]], align 4
-// CHECK9-NEXT:    [[TMP213:%.*]] = load i64, i64* [[CH_CASTED76]], align 8
-// CHECK9-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV79:%.*]] = bitcast i64* [[N_CASTED78]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP214]], i32* [[CONV79]], align 4
-// CHECK9-NEXT:    [[TMP215:%.*]] = load i64, i64* [[N_CASTED78]], align 8
-// CHECK9-NEXT:    [[TMP216:%.*]] = load double*, double** [[A]], align 8
-// CHECK9-NEXT:    [[TMP217:%.*]] = load double*, double** [[B]], align 8
-// CHECK9-NEXT:    [[TMP218:%.*]] = load double*, double** [[C]], align 8
-// CHECK9-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP220]], align 8
-// CHECK9-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP222]], align 8
-// CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP223]], align 8
-// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 1
+// CHECK9-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[CONV52]], align 4
+// CHECK9-NEXT:    [[TMP174:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK9-NEXT:    [[TMP175:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP176:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP177:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP179]], align 8
+// CHECK9-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP181]], align 8
+// CHECK9-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP182]], align 8
+// CHECK9-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP184]], align 8
+// CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP186]], align 8
+// CHECK9-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP187]], align 8
+// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to double**
+// CHECK9-NEXT:    store double* [[TMP175]], double** [[TMP189]], align 8
+// CHECK9-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to double**
+// CHECK9-NEXT:    store double* [[TMP175]], double** [[TMP191]], align 8
+// CHECK9-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP192]], align 8
+// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to double**
+// CHECK9-NEXT:    store double* [[TMP176]], double** [[TMP194]], align 8
+// CHECK9-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to double**
+// CHECK9-NEXT:    store double* [[TMP176]], double** [[TMP196]], align 8
+// CHECK9-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP197]], align 8
+// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to double**
+// CHECK9-NEXT:    store double* [[TMP177]], double** [[TMP199]], align 8
+// CHECK9-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to double**
+// CHECK9-NEXT:    store double* [[TMP177]], double** [[TMP201]], align 8
+// CHECK9-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP202]], align 8
+// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[SUB59:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK9-NEXT:    [[DIV60:%.*]] = sdiv i32 [[SUB59]], 1
+// CHECK9-NEXT:    [[SUB61:%.*]] = sub nsw i32 [[DIV60]], 1
+// CHECK9-NEXT:    store i32 [[SUB61]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK9-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD62]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK9-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK9-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK9-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 8
+// CHECK9-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 8
+// CHECK9-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64** [[TMP213]], align 8
+// CHECK9-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i64** [[TMP214]], align 8
+// CHECK9-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP215]], align 8
+// CHECK9-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP216]], align 8
+// CHECK9-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]])
+// CHECK9-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK9-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK9:       omp_offload.failed64:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505(i64 [[TMP172]], i64 [[TMP174]], double* [[TMP175]], double* [[TMP176]], double* [[TMP177]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
+// CHECK9:       omp_offload.cont65:
+// CHECK9-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV67:%.*]] = bitcast i64* [[N_CASTED66]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP219]], i32* [[CONV67]], align 4
+// CHECK9-NEXT:    [[TMP220:%.*]] = load i64, i64* [[N_CASTED66]], align 8
+// CHECK9-NEXT:    [[TMP221:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP222:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP223:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP225]], align 8
-// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP225]], align 8
+// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP227]], align 8
-// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP227]], align 8
+// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP228]], align 8
-// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to double**
-// CHECK9-NEXT:    store double* [[TMP216]], double** [[TMP230]], align 8
-// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 2
+// CHECK9-NEXT:    store double* [[TMP221]], double** [[TMP230]], align 8
+// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to double**
-// CHECK9-NEXT:    store double* [[TMP216]], double** [[TMP232]], align 8
-// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 2
+// CHECK9-NEXT:    store double* [[TMP221]], double** [[TMP232]], align 8
+// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 1
 // CHECK9-NEXT:    store i8* null, i8** [[TMP233]], align 8
-// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to double**
-// CHECK9-NEXT:    store double* [[TMP217]], double** [[TMP235]], align 8
-// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 3
+// CHECK9-NEXT:    store double* [[TMP222]], double** [[TMP235]], align 8
+// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to double**
-// CHECK9-NEXT:    store double* [[TMP217]], double** [[TMP237]], align 8
-// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 3
+// CHECK9-NEXT:    store double* [[TMP222]], double** [[TMP237]], align 8
+// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 2
 // CHECK9-NEXT:    store i8* null, i8** [[TMP238]], align 8
-// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to double**
-// CHECK9-NEXT:    store double* [[TMP218]], double** [[TMP240]], align 8
-// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 4
+// CHECK9-NEXT:    store double* [[TMP223]], double** [[TMP240]], align 8
+// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to double**
-// CHECK9-NEXT:    store double* [[TMP218]], double** [[TMP242]], align 8
-// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 4
+// CHECK9-NEXT:    store double* [[TMP223]], double** [[TMP242]], align 8
+// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 3
 // CHECK9-NEXT:    store i8* null, i8** [[TMP243]], align 8
-// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[SUB86:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK9-NEXT:    [[DIV87:%.*]] = sdiv i32 [[SUB86]], 1
-// CHECK9-NEXT:    [[SUB88:%.*]] = sub nsw i32 [[DIV87]], 1
-// CHECK9-NEXT:    store i32 [[SUB88]], i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[ADD89:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD89]] to i64
+// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[SUB74:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK9-NEXT:    [[DIV75:%.*]] = sdiv i32 [[SUB74]], 1
+// CHECK9-NEXT:    [[SUB76:%.*]] = sub nsw i32 [[DIV75]], 1
+// CHECK9-NEXT:    store i32 [[SUB76]], i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[ADD77:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD77]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK9-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK9-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED90:%.*]], label [[OMP_OFFLOAD_CONT91:%.*]]
-// CHECK9:       omp_offload.failed90:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561(i64 [[TMP213]], i64 [[TMP215]], double* [[TMP216]], double* [[TMP217]], double* [[TMP218]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT91]]
-// CHECK9:       omp_offload.cont91:
+// CHECK9-NEXT:    [[KERNEL_ARGS78:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK9-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK9-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 8
+// CHECK9-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 8
+// CHECK9-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64** [[TMP254]], align 8
+// CHECK9-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i64** [[TMP255]], align 8
+// CHECK9-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP256]], align 8
+// CHECK9-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP257]], align 8
+// CHECK9-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]])
+// CHECK9-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK9-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED79:%.*]], label [[OMP_OFFLOAD_CONT80:%.*]]
+// CHECK9:       omp_offload.failed79:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535(i64 [[TMP220]], double* [[TMP221]], double* [[TMP222]], double* [[TMP223]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT80]]
+// CHECK9:       omp_offload.cont80:
+// CHECK9-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV82:%.*]] = bitcast i64* [[CH_CASTED81]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP260]], i32* [[CONV82]], align 4
+// CHECK9-NEXT:    [[TMP261:%.*]] = load i64, i64* [[CH_CASTED81]], align 8
+// CHECK9-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV84:%.*]] = bitcast i64* [[N_CASTED83]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP262]], i32* [[CONV84]], align 4
+// CHECK9-NEXT:    [[TMP263:%.*]] = load i64, i64* [[N_CASTED83]], align 8
+// CHECK9-NEXT:    [[TMP264:%.*]] = load double*, double** [[A]], align 8
+// CHECK9-NEXT:    [[TMP265:%.*]] = load double*, double** [[B]], align 8
+// CHECK9-NEXT:    [[TMP266:%.*]] = load double*, double** [[C]], align 8
+// CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP268]], align 8
+// CHECK9-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP270]], align 8
+// CHECK9-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP271]], align 8
+// CHECK9-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP273]], align 8
+// CHECK9-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP275]], align 8
+// CHECK9-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP276]], align 8
+// CHECK9-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to double**
+// CHECK9-NEXT:    store double* [[TMP264]], double** [[TMP278]], align 8
+// CHECK9-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to double**
+// CHECK9-NEXT:    store double* [[TMP264]], double** [[TMP280]], align 8
+// CHECK9-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP281]], align 8
+// CHECK9-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to double**
+// CHECK9-NEXT:    store double* [[TMP265]], double** [[TMP283]], align 8
+// CHECK9-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to double**
+// CHECK9-NEXT:    store double* [[TMP265]], double** [[TMP285]], align 8
+// CHECK9-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP286]], align 8
+// CHECK9-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to double**
+// CHECK9-NEXT:    store double* [[TMP266]], double** [[TMP288]], align 8
+// CHECK9-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to double**
+// CHECK9-NEXT:    store double* [[TMP266]], double** [[TMP290]], align 8
+// CHECK9-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP291]], align 8
+// CHECK9-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[SUB91:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK9-NEXT:    [[DIV92:%.*]] = sdiv i32 [[SUB91]], 1
+// CHECK9-NEXT:    [[SUB93:%.*]] = sub nsw i32 [[DIV92]], 1
+// CHECK9-NEXT:    store i32 [[SUB93]], i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[ADD94:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK9-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD94]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK9-NEXT:    [[KERNEL_ARGS95:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK9-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK9-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 8
+// CHECK9-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 8
+// CHECK9-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64** [[TMP302]], align 8
+// CHECK9-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i64** [[TMP303]], align 8
+// CHECK9-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP304]], align 8
+// CHECK9-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP305]], align 8
+// CHECK9-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]])
+// CHECK9-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK9-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED96:%.*]], label [[OMP_OFFLOAD_CONT97:%.*]]
+// CHECK9:       omp_offload.failed96:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561(i64 [[TMP261]], i64 [[TMP263]], double* [[TMP264]], double* [[TMP265]], double* [[TMP266]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT97]]
+// CHECK9:       omp_offload.cont97:
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK9-NEXT:    ret i32 [[CALL]]
 //
@@ -7072,43 +7191,43 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS21:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS22:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS23:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED32:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP37:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_38:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED46:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED48:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_54:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[N_CASTED62:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS64:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS65:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS66:%.*]] = alloca [4 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP67:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_68:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_69:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[CH_CASTED76:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[N_CASTED78:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS80:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS81:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS82:%.*]] = alloca [5 x i8*], align 8
-// CHECK9-NEXT:    [[_TMP83:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_84:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_85:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED49:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED51:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS53:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS54:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS55:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[N_CASTED66:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS68:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS69:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS70:%.*]] = alloca [4 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP71:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_72:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_73:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[CH_CASTED81:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[N_CASTED83:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS85:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_PTRS86:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS87:%.*]] = alloca [5 x i8*], align 8
+// CHECK9-NEXT:    [[_TMP88:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_89:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[DOTCAPTURE_EXPR_90:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK9-NEXT:    store i32 100, i32* [[CH]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N]], align 4
@@ -7163,403 +7282,522 @@ int main() {
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64** [[TMP35]], align 8
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i64** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42(i64 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]], i32* [[TMP4]]) #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP37:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP35]], i32** [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP35]], i32** [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP36]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP36]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    store i32 [[TMP41]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP42]], i64* [[TMP49]], align 8
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP43]], i32** [[TMP52]], align 8
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP37]], i32** [[TMP54]], align 8
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP37]], i32** [[TMP56]], align 8
-// CHECK9-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK9-NEXT:    store i32* [[TMP43]], i32** [[TMP54]], align 8
+// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP55]], align 8
+// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP44]], i32** [[TMP57]], align 8
+// CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP44]], i32** [[TMP59]], align 8
+// CHECK9-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP60]], align 8
+// CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP45]], i32** [[TMP62]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP45]], i32** [[TMP64]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP65]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK9-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK9-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK9-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK9-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK9-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK9-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK9-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
-// CHECK9:       omp_offload.failed15:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50(i64 [[TMP34]], i32* [[TMP35]], i32* [[TMP36]], i32* [[TMP37]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
-// CHECK9:       omp_offload.cont16:
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV17:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV17]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[CH_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP68]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP69]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP70]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP70]], i32** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP71]], i32** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP71]], i32** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP72]], i32** [[TMP94]], align 8
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP72]], i32** [[TMP96]], align 8
-// CHECK9-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK9-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK9-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
-// CHECK9-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK9-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
-// CHECK9:       omp_offload.failed30:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58(i64 [[TMP67]], i64 [[TMP69]], i32* [[TMP70]], i32* [[TMP71]], i32* [[TMP72]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
-// CHECK9:       omp_offload.cont31:
-// CHECK9-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV33:%.*]] = bitcast i64* [[N_CASTED32]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP106]], i32* [[CONV33]], align 4
-// CHECK9-NEXT:    [[TMP107:%.*]] = load i64, i64* [[N_CASTED32]], align 8
-// CHECK9-NEXT:    [[TMP108:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP109:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP110:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP112]], align 8
-// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP107]], i64* [[TMP114]], align 8
-// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK9-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP108]], i32** [[TMP117]], align 8
-// CHECK9-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP108]], i32** [[TMP119]], align 8
-// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP120]], align 8
-// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP109]], i32** [[TMP122]], align 8
-// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP109]], i32** [[TMP124]], align 8
-// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP125]], align 8
-// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP110]], i32** [[TMP127]], align 8
-// CHECK9-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP110]], i32** [[TMP129]], align 8
-// CHECK9-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK9-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK9-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK9-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
-// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
-// CHECK9-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK9-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK9-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK9-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK9-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
-// CHECK9:       omp_offload.failed44:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66(i64 [[TMP107]], i32* [[TMP108]], i32* [[TMP109]], i32* [[TMP110]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
-// CHECK9:       omp_offload.cont45:
-// CHECK9-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV47:%.*]] = bitcast i64* [[CH_CASTED46]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP139]], i32* [[CONV47]], align 4
-// CHECK9-NEXT:    [[TMP140:%.*]] = load i64, i64* [[CH_CASTED46]], align 8
-// CHECK9-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV49:%.*]] = bitcast i64* [[N_CASTED48]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP141]], i32* [[CONV49]], align 4
-// CHECK9-NEXT:    [[TMP142:%.*]] = load i64, i64* [[N_CASTED48]], align 8
-// CHECK9-NEXT:    [[TMP143:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP144:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP145:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP147]], align 8
-// CHECK9-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP140]], i64* [[TMP149]], align 8
-// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP150]], align 8
-// CHECK9-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP152]], align 8
-// CHECK9-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP142]], i64* [[TMP154]], align 8
-// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP143]], i32** [[TMP157]], align 8
-// CHECK9-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP143]], i32** [[TMP159]], align 8
-// CHECK9-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP144]], i32** [[TMP162]], align 8
-// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP144]], i32** [[TMP164]], align 8
-// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP165]], align 8
-// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP145]], i32** [[TMP167]], align 8
-// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP145]], i32** [[TMP169]], align 8
-// CHECK9-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS52]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP170]], align 8
-// CHECK9-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i64** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS15]])
+// CHECK9-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK9-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9:       omp_offload.failed16:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50(i64 [[TMP42]], i32* [[TMP43]], i32* [[TMP44]], i32* [[TMP45]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
+// CHECK9:       omp_offload.cont17:
+// CHECK9-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[CH_CASTED]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP82]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP83:%.*]] = load i64, i64* [[CH_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP90]], align 8
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP83]], i64* [[TMP92]], align 8
+// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP93]], align 8
+// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP95]], align 8
+// CHECK9-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP85]], i64* [[TMP97]], align 8
+// CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP98]], align 8
+// CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP86]], i32** [[TMP100]], align 8
+// CHECK9-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP86]], i32** [[TMP102]], align 8
+// CHECK9-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP103]], align 8
+// CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP87]], i32** [[TMP105]], align 8
+// CHECK9-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP87]], i32** [[TMP107]], align 8
+// CHECK9-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK9-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP88]], i32** [[TMP110]], align 8
+// CHECK9-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP88]], i32** [[TMP112]], align 8
+// CHECK9-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK9-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
+// CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK9-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK9-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK9-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 8
+// CHECK9-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 8
+// CHECK9-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64** [[TMP124]], align 8
+// CHECK9-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i64** [[TMP125]], align 8
+// CHECK9-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP126]], align 8
+// CHECK9-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP127]], align 8
+// CHECK9-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS31]])
+// CHECK9-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK9-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9:       omp_offload.failed32:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58(i64 [[TMP83]], i64 [[TMP85]], i32* [[TMP86]], i32* [[TMP87]], i32* [[TMP88]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
+// CHECK9:       omp_offload.cont33:
+// CHECK9-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP130]], i32* [[CONV35]], align 4
+// CHECK9-NEXT:    [[TMP131:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK9-NEXT:    [[TMP132:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP133:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP134:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP136]], align 8
+// CHECK9-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP131]], i64* [[TMP138]], align 8
+// CHECK9-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP139]], align 8
+// CHECK9-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP132]], i32** [[TMP141]], align 8
+// CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP132]], i32** [[TMP143]], align 8
+// CHECK9-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP144]], align 8
+// CHECK9-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP133]], i32** [[TMP146]], align 8
+// CHECK9-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP133]], i32** [[TMP148]], align 8
+// CHECK9-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP149]], align 8
+// CHECK9-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP134]], i32** [[TMP151]], align 8
+// CHECK9-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP134]], i32** [[TMP153]], align 8
+// CHECK9-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK9-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_40]], align 4
+// CHECK9-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK9-NEXT:    [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1
+// CHECK9-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[DIV43]], 1
+// CHECK9-NEXT:    store i32 [[SUB44]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK9-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK9-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD45]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK9-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK9-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK9-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 8
+// CHECK9-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 8
+// CHECK9-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64** [[TMP165]], align 8
+// CHECK9-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i64** [[TMP166]], align 8
+// CHECK9-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP167]], align 8
+// CHECK9-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP168]], align 8
+// CHECK9-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS46]])
+// CHECK9-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK9-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK9:       omp_offload.failed47:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66(i64 [[TMP131]], i32* [[TMP132]], i32* [[TMP133]], i32* [[TMP134]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
+// CHECK9:       omp_offload.cont48:
+// CHECK9-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV50:%.*]] = bitcast i64* [[CH_CASTED49]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP171]], i32* [[CONV50]], align 4
+// CHECK9-NEXT:    [[TMP172:%.*]] = load i64, i64* [[CH_CASTED49]], align 8
 // CHECK9-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK9-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK9-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
-// CHECK9-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
-// CHECK9-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK9-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK9-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK9-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK9-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
-// CHECK9:       omp_offload.failed60:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74(i64 [[TMP140]], i64 [[TMP142]], i32* [[TMP143]], i32* [[TMP144]], i32* [[TMP145]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
-// CHECK9:       omp_offload.cont61:
-// CHECK9-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV63:%.*]] = bitcast i64* [[N_CASTED62]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP179]], i32* [[CONV63]], align 4
-// CHECK9-NEXT:    [[TMP180:%.*]] = load i64, i64* [[N_CASTED62]], align 8
-// CHECK9-NEXT:    [[TMP181:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP182:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP183:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP185]], align 8
-// CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP180]], i64* [[TMP187]], align 8
-// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP188]], align 8
-// CHECK9-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP181]], i32** [[TMP190]], align 8
-// CHECK9-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP181]], i32** [[TMP192]], align 8
-// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP193]], align 8
-// CHECK9-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP182]], i32** [[TMP195]], align 8
-// CHECK9-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP182]], i32** [[TMP197]], align 8
-// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP198]], align 8
-// CHECK9-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP183]], i32** [[TMP200]], align 8
-// CHECK9-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP183]], i32** [[TMP202]], align 8
-// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS66]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP203]], align 8
-// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS64]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS65]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_68]], align 4
-// CHECK9-NEXT:    [[SUB70:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK9-NEXT:    [[DIV71:%.*]] = sdiv i32 [[SUB70]], 1
-// CHECK9-NEXT:    [[SUB72:%.*]] = sub nsw i32 [[DIV71]], 1
-// CHECK9-NEXT:    store i32 [[SUB72]], i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_69]], align 4
-// CHECK9-NEXT:    [[ADD73:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK9-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD73]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK9-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK9-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED74:%.*]], label [[OMP_OFFLOAD_CONT75:%.*]]
-// CHECK9:       omp_offload.failed74:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82(i64 [[TMP180]], i32* [[TMP181]], i32* [[TMP182]], i32* [[TMP183]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT75]]
-// CHECK9:       omp_offload.cont75:
-// CHECK9-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK9-NEXT:    [[CONV77:%.*]] = bitcast i64* [[CH_CASTED76]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP212]], i32* [[CONV77]], align 4
-// CHECK9-NEXT:    [[TMP213:%.*]] = load i64, i64* [[CH_CASTED76]], align 8
-// CHECK9-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    [[CONV79:%.*]] = bitcast i64* [[N_CASTED78]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP214]], i32* [[CONV79]], align 4
-// CHECK9-NEXT:    [[TMP215:%.*]] = load i64, i64* [[N_CASTED78]], align 8
-// CHECK9-NEXT:    [[TMP216:%.*]] = load i32*, i32** [[A]], align 8
-// CHECK9-NEXT:    [[TMP217:%.*]] = load i32*, i32** [[B]], align 8
-// CHECK9-NEXT:    [[TMP218:%.*]] = load i32*, i32** [[C]], align 8
-// CHECK9-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP220]], align 8
-// CHECK9-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP213]], i64* [[TMP222]], align 8
-// CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP223]], align 8
-// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 1
+// CHECK9-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP173]], i32* [[CONV52]], align 4
+// CHECK9-NEXT:    [[TMP174:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK9-NEXT:    [[TMP175:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP176:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP177:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP179]], align 8
+// CHECK9-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP172]], i64* [[TMP181]], align 8
+// CHECK9-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP182]], align 8
+// CHECK9-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP184]], align 8
+// CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP174]], i64* [[TMP186]], align 8
+// CHECK9-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP187]], align 8
+// CHECK9-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP175]], i32** [[TMP189]], align 8
+// CHECK9-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP175]], i32** [[TMP191]], align 8
+// CHECK9-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP192]], align 8
+// CHECK9-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP176]], i32** [[TMP194]], align 8
+// CHECK9-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP176]], i32** [[TMP196]], align 8
+// CHECK9-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP197]], align 8
+// CHECK9-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP177]], i32** [[TMP199]], align 8
+// CHECK9-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP177]], i32** [[TMP201]], align 8
+// CHECK9-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP202]], align 8
+// CHECK9-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_57]], align 4
+// CHECK9-NEXT:    [[SUB59:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK9-NEXT:    [[DIV60:%.*]] = sdiv i32 [[SUB59]], 1
+// CHECK9-NEXT:    [[SUB61:%.*]] = sub nsw i32 [[DIV60]], 1
+// CHECK9-NEXT:    store i32 [[SUB61]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK9-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK9-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD62]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK9-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK9-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK9-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 8
+// CHECK9-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 8
+// CHECK9-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64** [[TMP213]], align 8
+// CHECK9-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i64** [[TMP214]], align 8
+// CHECK9-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP215]], align 8
+// CHECK9-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP216]], align 8
+// CHECK9-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS63]])
+// CHECK9-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK9-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK9:       omp_offload.failed64:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74(i64 [[TMP172]], i64 [[TMP174]], i32* [[TMP175]], i32* [[TMP176]], i32* [[TMP177]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
+// CHECK9:       omp_offload.cont65:
+// CHECK9-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV67:%.*]] = bitcast i64* [[N_CASTED66]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP219]], i32* [[CONV67]], align 4
+// CHECK9-NEXT:    [[TMP220:%.*]] = load i64, i64* [[N_CASTED66]], align 8
+// CHECK9-NEXT:    [[TMP221:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP222:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP223:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP225]], align 8
-// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP225]], align 8
+// CHECK9-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP215]], i64* [[TMP227]], align 8
-// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 1
+// CHECK9-NEXT:    store i64 [[TMP220]], i64* [[TMP227]], align 8
+// CHECK9-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP228]], align 8
-// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP216]], i32** [[TMP230]], align 8
-// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 2
+// CHECK9-NEXT:    store i32* [[TMP221]], i32** [[TMP230]], align 8
+// CHECK9-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP216]], i32** [[TMP232]], align 8
-// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 2
+// CHECK9-NEXT:    store i32* [[TMP221]], i32** [[TMP232]], align 8
+// CHECK9-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 1
 // CHECK9-NEXT:    store i8* null, i8** [[TMP233]], align 8
-// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP217]], i32** [[TMP235]], align 8
-// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 3
+// CHECK9-NEXT:    store i32* [[TMP222]], i32** [[TMP235]], align 8
+// CHECK9-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP217]], i32** [[TMP237]], align 8
-// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 3
+// CHECK9-NEXT:    store i32* [[TMP222]], i32** [[TMP237]], align 8
+// CHECK9-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 2
 // CHECK9-NEXT:    store i8* null, i8** [[TMP238]], align 8
-// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP218]], i32** [[TMP240]], align 8
-// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 4
+// CHECK9-NEXT:    store i32* [[TMP223]], i32** [[TMP240]], align 8
+// CHECK9-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to i32**
-// CHECK9-NEXT:    store i32* [[TMP218]], i32** [[TMP242]], align 8
-// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS82]], i64 0, i64 4
+// CHECK9-NEXT:    store i32* [[TMP223]], i32** [[TMP242]], align 8
+// CHECK9-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS70]], i64 0, i64 3
 // CHECK9-NEXT:    store i8* null, i8** [[TMP243]], align 8
-// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS80]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS81]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS68]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS69]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_84]], align 4
-// CHECK9-NEXT:    [[SUB86:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK9-NEXT:    [[DIV87:%.*]] = sdiv i32 [[SUB86]], 1
-// CHECK9-NEXT:    [[SUB88:%.*]] = sub nsw i32 [[DIV87]], 1
-// CHECK9-NEXT:    store i32 [[SUB88]], i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_85]], align 4
-// CHECK9-NEXT:    [[ADD89:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD89]] to i64
+// CHECK9-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_72]], align 4
+// CHECK9-NEXT:    [[SUB74:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK9-NEXT:    [[DIV75:%.*]] = sdiv i32 [[SUB74]], 1
+// CHECK9-NEXT:    [[SUB76:%.*]] = sub nsw i32 [[DIV75]], 1
+// CHECK9-NEXT:    store i32 [[SUB76]], i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_73]], align 4
+// CHECK9-NEXT:    [[ADD77:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK9-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD77]] to i64
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK9-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK9-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED90:%.*]], label [[OMP_OFFLOAD_CONT91:%.*]]
-// CHECK9:       omp_offload.failed90:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90(i64 [[TMP213]], i64 [[TMP215]], i32* [[TMP216]], i32* [[TMP217]], i32* [[TMP218]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT91]]
-// CHECK9:       omp_offload.cont91:
+// CHECK9-NEXT:    [[KERNEL_ARGS78:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK9-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK9-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 8
+// CHECK9-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 8
+// CHECK9-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64** [[TMP254]], align 8
+// CHECK9-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i64** [[TMP255]], align 8
+// CHECK9-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP256]], align 8
+// CHECK9-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP257]], align 8
+// CHECK9-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS78]])
+// CHECK9-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK9-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED79:%.*]], label [[OMP_OFFLOAD_CONT80:%.*]]
+// CHECK9:       omp_offload.failed79:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82(i64 [[TMP220]], i32* [[TMP221]], i32* [[TMP222]], i32* [[TMP223]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT80]]
+// CHECK9:       omp_offload.cont80:
+// CHECK9-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK9-NEXT:    [[CONV82:%.*]] = bitcast i64* [[CH_CASTED81]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP260]], i32* [[CONV82]], align 4
+// CHECK9-NEXT:    [[TMP261:%.*]] = load i64, i64* [[CH_CASTED81]], align 8
+// CHECK9-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[CONV84:%.*]] = bitcast i64* [[N_CASTED83]] to i32*
+// CHECK9-NEXT:    store i32 [[TMP262]], i32* [[CONV84]], align 4
+// CHECK9-NEXT:    [[TMP263:%.*]] = load i64, i64* [[N_CASTED83]], align 8
+// CHECK9-NEXT:    [[TMP264:%.*]] = load i32*, i32** [[A]], align 8
+// CHECK9-NEXT:    [[TMP265:%.*]] = load i32*, i32** [[B]], align 8
+// CHECK9-NEXT:    [[TMP266:%.*]] = load i32*, i32** [[C]], align 8
+// CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP268]], align 8
+// CHECK9-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP261]], i64* [[TMP270]], align 8
+// CHECK9-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP271]], align 8
+// CHECK9-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP273]], align 8
+// CHECK9-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP263]], i64* [[TMP275]], align 8
+// CHECK9-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP276]], align 8
+// CHECK9-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP264]], i32** [[TMP278]], align 8
+// CHECK9-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP264]], i32** [[TMP280]], align 8
+// CHECK9-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP281]], align 8
+// CHECK9-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP265]], i32** [[TMP283]], align 8
+// CHECK9-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP265]], i32** [[TMP285]], align 8
+// CHECK9-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP286]], align 8
+// CHECK9-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP266]], i32** [[TMP288]], align 8
+// CHECK9-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to i32**
+// CHECK9-NEXT:    store i32* [[TMP266]], i32** [[TMP290]], align 8
+// CHECK9-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS87]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP291]], align 8
+// CHECK9-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS85]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS86]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_89]], align 4
+// CHECK9-NEXT:    [[SUB91:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK9-NEXT:    [[DIV92:%.*]] = sdiv i32 [[SUB91]], 1
+// CHECK9-NEXT:    [[SUB93:%.*]] = sub nsw i32 [[DIV92]], 1
+// CHECK9-NEXT:    store i32 [[SUB93]], i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_90]], align 4
+// CHECK9-NEXT:    [[ADD94:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK9-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD94]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK9-NEXT:    [[KERNEL_ARGS95:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 0
+// CHECK9-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK9-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK9-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 2
+// CHECK9-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 8
+// CHECK9-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 3
+// CHECK9-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 8
+// CHECK9-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 4
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64** [[TMP302]], align 8
+// CHECK9-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 5
+// CHECK9-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i64** [[TMP303]], align 8
+// CHECK9-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 6
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP304]], align 8
+// CHECK9-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]], i32 0, i32 7
+// CHECK9-NEXT:    store i8** null, i8*** [[TMP305]], align 8
+// CHECK9-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS95]])
+// CHECK9-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK9-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED96:%.*]], label [[OMP_OFFLOAD_CONT97:%.*]]
+// CHECK9:       omp_offload.failed96:
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90(i64 [[TMP261]], i64 [[TMP263]], i32* [[TMP264]], i32* [[TMP265]], i32* [[TMP266]]) #[[ATTR2]]
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT97]]
+// CHECK9:       omp_offload.cont97:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -9450,43 +9688,43 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED16:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP20:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS19:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED29:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS30:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS31:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS32:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP33:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_35:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED42:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED43:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS44:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS45:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS46:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP47:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_48:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED56:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_61:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_62:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED69:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED70:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS71:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS72:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS73:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP74:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_75:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_76:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS48:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS49:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS63:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS77:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS78:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK11-NEXT:    store i32 100, i32* [[CH]], align 4
@@ -9541,394 +9779,513 @@ int main() {
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l368(i32 [[TMP1]], double* [[TMP2]], double* [[TMP3]], double* [[TMP4]]) #[[ATTR2:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to double**
-// CHECK11-NEXT:    store double* [[TMP35]], double** [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to double**
-// CHECK11-NEXT:    store double* [[TMP35]], double** [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
-// CHECK11-NEXT:    store double* [[TMP36]], double** [[TMP49]], align 4
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK11-NEXT:    store double* [[TMP36]], double** [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP47]], align 4
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP49]], align 4
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP50]], align 4
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
+// CHECK11-NEXT:    store double* [[TMP43]], double** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK11-NEXT:    store double* [[TMP37]], double** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to double**
-// CHECK11-NEXT:    store double* [[TMP37]], double** [[TMP56]], align 4
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP57]], align 4
-// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK11-NEXT:    store double* [[TMP43]], double** [[TMP54]], align 4
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP55]], align 4
+// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double**
+// CHECK11-NEXT:    store double* [[TMP44]], double** [[TMP57]], align 4
+// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to double**
+// CHECK11-NEXT:    store double* [[TMP44]], double** [[TMP59]], align 4
+// CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP60]], align 4
+// CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double**
+// CHECK11-NEXT:    store double* [[TMP45]], double** [[TMP62]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to double**
+// CHECK11-NEXT:    store double* [[TMP45]], double** [[TMP64]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP65]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK11-NEXT:    [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
 // CHECK11-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[DIV11]], 1
 // CHECK11-NEXT:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK11-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK11-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK11-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
-// CHECK11:       omp_offload.failed14:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407(i32 [[TMP34]], double* [[TMP35]], double* [[TMP36]], double* [[TMP37]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
-// CHECK11:       omp_offload.cont15:
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP71:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to double**
-// CHECK11-NEXT:    store double* [[TMP70]], double** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to double**
-// CHECK11-NEXT:    store double* [[TMP70]], double** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
-// CHECK11-NEXT:    store double* [[TMP71]], double** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to double**
-// CHECK11-NEXT:    store double* [[TMP71]], double** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK11-NEXT:    store double* [[TMP72]], double** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to double**
-// CHECK11-NEXT:    store double* [[TMP72]], double** [[TMP96]], align 4
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP97]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK11-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[DIV24]], 1
-// CHECK11-NEXT:    store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[ADD26:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK11-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK11-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
-// CHECK11:       omp_offload.failed27:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446(i32 [[TMP67]], i32 [[TMP69]], double* [[TMP70]], double* [[TMP71]], double* [[TMP72]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
-// CHECK11:       omp_offload.cont28:
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP106]], i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP107:%.*]] = load i32, i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP108:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP109:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP110:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP112]], align 4
-// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP114]], align 4
-// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK11-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to double**
-// CHECK11-NEXT:    store double* [[TMP108]], double** [[TMP117]], align 4
-// CHECK11-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to double**
-// CHECK11-NEXT:    store double* [[TMP108]], double** [[TMP119]], align 4
-// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP120]], align 4
-// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to double**
-// CHECK11-NEXT:    store double* [[TMP109]], double** [[TMP122]], align 4
-// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to double**
-// CHECK11-NEXT:    store double* [[TMP109]], double** [[TMP124]], align 4
-// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
-// CHECK11-NEXT:    store double* [[TMP110]], double** [[TMP127]], align 4
-// CHECK11-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
-// CHECK11-NEXT:    store double* [[TMP110]], double** [[TMP129]], align 4
-// CHECK11-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK11-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[SUB36:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK11-NEXT:    [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1
-// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[DIV37]], 1
-// CHECK11-NEXT:    store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[ADD39:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK11-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD39]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK11-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK11-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED40:%.*]], label [[OMP_OFFLOAD_CONT41:%.*]]
-// CHECK11:       omp_offload.failed40:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477(i32 [[TMP107]], double* [[TMP108]], double* [[TMP109]], double* [[TMP110]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT41]]
-// CHECK11:       omp_offload.cont41:
-// CHECK11-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP139]], i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP140:%.*]] = load i32, i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP141]], i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP142:%.*]] = load i32, i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP143:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP144:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP145:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP147]], align 4
-// CHECK11-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP149]], align 4
-// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP150]], align 4
-// CHECK11-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP152]], align 4
-// CHECK11-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP154]], align 4
-// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP155]], align 4
-// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to double**
-// CHECK11-NEXT:    store double* [[TMP143]], double** [[TMP157]], align 4
-// CHECK11-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to double**
-// CHECK11-NEXT:    store double* [[TMP143]], double** [[TMP159]], align 4
-// CHECK11-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to double**
-// CHECK11-NEXT:    store double* [[TMP144]], double** [[TMP162]], align 4
-// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to double**
-// CHECK11-NEXT:    store double* [[TMP144]], double** [[TMP164]], align 4
-// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to double**
-// CHECK11-NEXT:    store double* [[TMP145]], double** [[TMP167]], align 4
-// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to double**
-// CHECK11-NEXT:    store double* [[TMP145]], double** [[TMP169]], align 4
-// CHECK11-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP170]], align 4
-// CHECK11-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK11-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD13]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]])
+// CHECK11-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK11-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11:       omp_offload.failed15:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l407(i32 [[TMP42]], double* [[TMP43]], double* [[TMP44]], double* [[TMP45]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
+// CHECK11:       omp_offload.cont16:
+// CHECK11-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP82]], i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = load i32, i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP84]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP92]], align 4
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP95]], align 4
+// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP97]], align 4
+// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP98]], align 4
+// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to double**
+// CHECK11-NEXT:    store double* [[TMP86]], double** [[TMP100]], align 4
+// CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to double**
+// CHECK11-NEXT:    store double* [[TMP86]], double** [[TMP102]], align 4
+// CHECK11-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP103]], align 4
+// CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to double**
+// CHECK11-NEXT:    store double* [[TMP87]], double** [[TMP105]], align 4
+// CHECK11-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to double**
+// CHECK11-NEXT:    store double* [[TMP87]], double** [[TMP107]], align 4
+// CHECK11-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK11-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to double**
+// CHECK11-NEXT:    store double* [[TMP88]], double** [[TMP110]], align 4
+// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to double**
+// CHECK11-NEXT:    store double* [[TMP88]], double** [[TMP112]], align 4
+// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK11-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[SUB24:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK11-NEXT:    [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[DIV25]], 1
+// CHECK11-NEXT:    store i32 [[SUB26]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK11-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD27]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK11-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 4
+// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 4
+// CHECK11-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.8, i32 0, i32 0), i64** [[TMP124]], align 4
+// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.9, i32 0, i32 0), i64** [[TMP125]], align 4
+// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP126]], align 4
+// CHECK11-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP127]], align 4
+// CHECK11-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]])
+// CHECK11-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK11-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11:       omp_offload.failed29:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l446(i32 [[TMP83]], i32 [[TMP85]], double* [[TMP86]], double* [[TMP87]], double* [[TMP88]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
+// CHECK11:       omp_offload.cont30:
+// CHECK11-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP130]], i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP132:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP133:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP134:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP136]], align 4
+// CHECK11-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP138]], align 4
+// CHECK11-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK11-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
+// CHECK11-NEXT:    store double* [[TMP132]], double** [[TMP141]], align 4
+// CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
+// CHECK11-NEXT:    store double* [[TMP132]], double** [[TMP143]], align 4
+// CHECK11-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP144]], align 4
+// CHECK11-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to double**
+// CHECK11-NEXT:    store double* [[TMP133]], double** [[TMP146]], align 4
+// CHECK11-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to double**
+// CHECK11-NEXT:    store double* [[TMP133]], double** [[TMP148]], align 4
+// CHECK11-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP149]], align 4
+// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to double**
+// CHECK11-NEXT:    store double* [[TMP134]], double** [[TMP151]], align 4
+// CHECK11-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to double**
+// CHECK11-NEXT:    store double* [[TMP134]], double** [[TMP153]], align 4
+// CHECK11-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK11-NEXT:    [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1
+// CHECK11-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[DIV39]], 1
+// CHECK11-NEXT:    store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK11-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD41]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK11-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 4
+// CHECK11-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 4
+// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64** [[TMP165]], align 4
+// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i64** [[TMP166]], align 4
+// CHECK11-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP167]], align 4
+// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP168]], align 4
+// CHECK11-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]])
+// CHECK11-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK11-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK11:       omp_offload.failed43:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l477(i32 [[TMP131]], double* [[TMP132]], double* [[TMP133]], double* [[TMP134]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
+// CHECK11:       omp_offload.cont44:
+// CHECK11-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP171]], i32* [[CH_CASTED45]], align 4
+// CHECK11-NEXT:    [[TMP172:%.*]] = load i32, i32* [[CH_CASTED45]], align 4
 // CHECK11-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[SUB50:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK11-NEXT:    [[DIV51:%.*]] = sdiv i32 [[SUB50]], 1
-// CHECK11-NEXT:    [[SUB52:%.*]] = sub nsw i32 [[DIV51]], 1
-// CHECK11-NEXT:    store i32 [[SUB52]], i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[ADD53:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK11-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD53]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK11-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK11-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED54:%.*]], label [[OMP_OFFLOAD_CONT55:%.*]]
-// CHECK11:       omp_offload.failed54:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505(i32 [[TMP140]], i32 [[TMP142]], double* [[TMP143]], double* [[TMP144]], double* [[TMP145]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT55]]
-// CHECK11:       omp_offload.cont55:
-// CHECK11-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP179]], i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP180:%.*]] = load i32, i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP181:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP182:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP183:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP185]], align 4
-// CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP187]], align 4
-// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP188]], align 4
-// CHECK11-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to double**
-// CHECK11-NEXT:    store double* [[TMP181]], double** [[TMP190]], align 4
-// CHECK11-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to double**
-// CHECK11-NEXT:    store double* [[TMP181]], double** [[TMP192]], align 4
-// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP193]], align 4
-// CHECK11-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to double**
-// CHECK11-NEXT:    store double* [[TMP182]], double** [[TMP195]], align 4
-// CHECK11-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to double**
-// CHECK11-NEXT:    store double* [[TMP182]], double** [[TMP197]], align 4
-// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP198]], align 4
-// CHECK11-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to double**
-// CHECK11-NEXT:    store double* [[TMP183]], double** [[TMP200]], align 4
-// CHECK11-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to double**
-// CHECK11-NEXT:    store double* [[TMP183]], double** [[TMP202]], align 4
-// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP203]], align 4
-// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[SUB63:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK11-NEXT:    [[DIV64:%.*]] = sdiv i32 [[SUB63]], 1
-// CHECK11-NEXT:    [[SUB65:%.*]] = sub nsw i32 [[DIV64]], 1
-// CHECK11-NEXT:    store i32 [[SUB65]], i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK11-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD66]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK11-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK11-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED67:%.*]], label [[OMP_OFFLOAD_CONT68:%.*]]
-// CHECK11:       omp_offload.failed67:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535(i32 [[TMP180]], double* [[TMP181]], double* [[TMP182]], double* [[TMP183]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT68]]
-// CHECK11:       omp_offload.cont68:
-// CHECK11-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP212]], i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP213:%.*]] = load i32, i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP214]], i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP215:%.*]] = load i32, i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP216:%.*]] = load double*, double** [[A]], align 4
-// CHECK11-NEXT:    [[TMP217:%.*]] = load double*, double** [[B]], align 4
-// CHECK11-NEXT:    [[TMP218:%.*]] = load double*, double** [[C]], align 4
-// CHECK11-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP220]], align 4
-// CHECK11-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP222]], align 4
-// CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP223]], align 4
-// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP175:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP176:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP177:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP179]], align 4
+// CHECK11-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP181]], align 4
+// CHECK11-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP182]], align 4
+// CHECK11-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP184]], align 4
+// CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP186]], align 4
+// CHECK11-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP187]], align 4
+// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to double**
+// CHECK11-NEXT:    store double* [[TMP175]], double** [[TMP189]], align 4
+// CHECK11-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to double**
+// CHECK11-NEXT:    store double* [[TMP175]], double** [[TMP191]], align 4
+// CHECK11-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP192]], align 4
+// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to double**
+// CHECK11-NEXT:    store double* [[TMP176]], double** [[TMP194]], align 4
+// CHECK11-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to double**
+// CHECK11-NEXT:    store double* [[TMP176]], double** [[TMP196]], align 4
+// CHECK11-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP197]], align 4
+// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to double**
+// CHECK11-NEXT:    store double* [[TMP177]], double** [[TMP199]], align 4
+// CHECK11-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to double**
+// CHECK11-NEXT:    store double* [[TMP177]], double** [[TMP201]], align 4
+// CHECK11-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP202]], align 4
+// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[SUB53:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK11-NEXT:    [[DIV54:%.*]] = sdiv i32 [[SUB53]], 1
+// CHECK11-NEXT:    [[SUB55:%.*]] = sub nsw i32 [[DIV54]], 1
+// CHECK11-NEXT:    store i32 [[SUB55]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK11-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD56]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK11-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK11-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 4
+// CHECK11-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 4
+// CHECK11-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64** [[TMP213]], align 4
+// CHECK11-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i64** [[TMP214]], align 4
+// CHECK11-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP215]], align 4
+// CHECK11-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP216]], align 4
+// CHECK11-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]])
+// CHECK11-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK11-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK11:       omp_offload.failed58:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l505(i32 [[TMP172]], i32 [[TMP174]], double* [[TMP175]], double* [[TMP176]], double* [[TMP177]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
+// CHECK11:       omp_offload.cont59:
+// CHECK11-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP219]], i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP220:%.*]] = load i32, i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP221:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP222:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP223:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP225]], align 4
-// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP225]], align 4
+// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP227]], align 4
-// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP227]], align 4
+// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP228]], align 4
-// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to double**
-// CHECK11-NEXT:    store double* [[TMP216]], double** [[TMP230]], align 4
-// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 2
+// CHECK11-NEXT:    store double* [[TMP221]], double** [[TMP230]], align 4
+// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to double**
-// CHECK11-NEXT:    store double* [[TMP216]], double** [[TMP232]], align 4
-// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 2
+// CHECK11-NEXT:    store double* [[TMP221]], double** [[TMP232]], align 4
+// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 1
 // CHECK11-NEXT:    store i8* null, i8** [[TMP233]], align 4
-// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to double**
-// CHECK11-NEXT:    store double* [[TMP217]], double** [[TMP235]], align 4
-// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 3
+// CHECK11-NEXT:    store double* [[TMP222]], double** [[TMP235]], align 4
+// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to double**
-// CHECK11-NEXT:    store double* [[TMP217]], double** [[TMP237]], align 4
-// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 3
+// CHECK11-NEXT:    store double* [[TMP222]], double** [[TMP237]], align 4
+// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 2
 // CHECK11-NEXT:    store i8* null, i8** [[TMP238]], align 4
-// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to double**
-// CHECK11-NEXT:    store double* [[TMP218]], double** [[TMP240]], align 4
-// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 4
+// CHECK11-NEXT:    store double* [[TMP223]], double** [[TMP240]], align 4
+// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to double**
-// CHECK11-NEXT:    store double* [[TMP218]], double** [[TMP242]], align 4
-// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 4
+// CHECK11-NEXT:    store double* [[TMP223]], double** [[TMP242]], align 4
+// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 3
 // CHECK11-NEXT:    store i8* null, i8** [[TMP243]], align 4
-// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[SUB77:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK11-NEXT:    [[DIV78:%.*]] = sdiv i32 [[SUB77]], 1
-// CHECK11-NEXT:    [[SUB79:%.*]] = sub nsw i32 [[DIV78]], 1
-// CHECK11-NEXT:    store i32 [[SUB79]], i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[ADD80:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD80]] to i64
+// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[SUB67:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK11-NEXT:    [[DIV68:%.*]] = sdiv i32 [[SUB67]], 1
+// CHECK11-NEXT:    [[SUB69:%.*]] = sub nsw i32 [[DIV68]], 1
+// CHECK11-NEXT:    store i32 [[SUB69]], i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD70]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK11-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK11-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED81:%.*]], label [[OMP_OFFLOAD_CONT82:%.*]]
-// CHECK11:       omp_offload.failed81:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561(i32 [[TMP213]], i32 [[TMP215]], double* [[TMP216]], double* [[TMP217]], double* [[TMP218]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT82]]
-// CHECK11:       omp_offload.cont82:
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK11-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK11-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 4
+// CHECK11-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 4
+// CHECK11-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.20, i32 0, i32 0), i64** [[TMP254]], align 4
+// CHECK11-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.21, i32 0, i32 0), i64** [[TMP255]], align 4
+// CHECK11-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP256]], align 4
+// CHECK11-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP257]], align 4
+// CHECK11-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]])
+// CHECK11-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK11-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED72:%.*]], label [[OMP_OFFLOAD_CONT73:%.*]]
+// CHECK11:       omp_offload.failed72:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l535(i32 [[TMP220]], double* [[TMP221]], double* [[TMP222]], double* [[TMP223]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT73]]
+// CHECK11:       omp_offload.cont73:
+// CHECK11-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP260]], i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP261:%.*]] = load i32, i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP262]], i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP263:%.*]] = load i32, i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP264:%.*]] = load double*, double** [[A]], align 4
+// CHECK11-NEXT:    [[TMP265:%.*]] = load double*, double** [[B]], align 4
+// CHECK11-NEXT:    [[TMP266:%.*]] = load double*, double** [[C]], align 4
+// CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP268]], align 4
+// CHECK11-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP270]], align 4
+// CHECK11-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP271]], align 4
+// CHECK11-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP273]], align 4
+// CHECK11-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP275]], align 4
+// CHECK11-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP276]], align 4
+// CHECK11-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to double**
+// CHECK11-NEXT:    store double* [[TMP264]], double** [[TMP278]], align 4
+// CHECK11-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to double**
+// CHECK11-NEXT:    store double* [[TMP264]], double** [[TMP280]], align 4
+// CHECK11-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP281]], align 4
+// CHECK11-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to double**
+// CHECK11-NEXT:    store double* [[TMP265]], double** [[TMP283]], align 4
+// CHECK11-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to double**
+// CHECK11-NEXT:    store double* [[TMP265]], double** [[TMP285]], align 4
+// CHECK11-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP286]], align 4
+// CHECK11-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to double**
+// CHECK11-NEXT:    store double* [[TMP266]], double** [[TMP288]], align 4
+// CHECK11-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to double**
+// CHECK11-NEXT:    store double* [[TMP266]], double** [[TMP290]], align 4
+// CHECK11-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP291]], align 4
+// CHECK11-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[SUB82:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK11-NEXT:    [[DIV83:%.*]] = sdiv i32 [[SUB82]], 1
+// CHECK11-NEXT:    [[SUB84:%.*]] = sub nsw i32 [[DIV83]], 1
+// CHECK11-NEXT:    store i32 [[SUB84]], i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK11-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD85]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK11-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK11-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 4
+// CHECK11-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 4
+// CHECK11-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.24, i32 0, i32 0), i64** [[TMP302]], align 4
+// CHECK11-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i64** [[TMP303]], align 4
+// CHECK11-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP304]], align 4
+// CHECK11-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP305]], align 4
+// CHECK11-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]])
+// CHECK11-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK11-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED87:%.*]], label [[OMP_OFFLOAD_CONT88:%.*]]
+// CHECK11:       omp_offload.failed87:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l561(i32 [[TMP261]], i32 [[TMP263]], double* [[TMP264]], double* [[TMP265]], double* [[TMP266]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT88]]
+// CHECK11:       omp_offload.cont88:
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK11-NEXT:    ret i32 [[CALL]]
 //
@@ -11747,43 +12104,43 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED16:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP20:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_21:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS19:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED29:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS30:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS31:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS32:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP33:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_35:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED42:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED43:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS44:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS45:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS46:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP47:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_48:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED56:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [4 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_61:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_62:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[CH_CASTED69:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[N_CASTED70:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS71:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS72:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS73:%.*]] = alloca [5 x i8*], align 4
-// CHECK11-NEXT:    [[_TMP74:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_75:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_76:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS48:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS49:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS63:%.*]] = alloca [4 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_PTRS77:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS78:%.*]] = alloca [5 x i8*], align 4
+// CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 10000, i32* [[N]], align 4
 // CHECK11-NEXT:    store i32 100, i32* [[CH]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, i32* [[N]], align 4
@@ -11837,394 +12194,513 @@ int main() {
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, i32 4, i8** [[TMP25]], i8** [[TMP26]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP32]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP25]], i8*** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP26]], i8*** [[TMP34]], align 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.28, i32 0, i32 0), i64** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.29, i32 0, i32 0), i64** [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP38]], align 4
+// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
+// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l42(i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]], i32* [[TMP4]]) #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP35]], i32** [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP35]], i32** [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP36]], i32** [[TMP49]], align 4
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP36]], i32** [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP47]], align 4
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[TMP49]], align 4
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP50]], align 4
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP43]], i32** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP37]], i32** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP37]], i32** [[TMP56]], align 4
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP57]], align 4
-// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP60]], i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
-// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP61]], 0
+// CHECK11-NEXT:    store i32* [[TMP43]], i32** [[TMP54]], align 4
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP55]], align 4
+// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP44]], i32** [[TMP57]], align 4
+// CHECK11-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP44]], i32** [[TMP59]], align 4
+// CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP60]], align 4
+// CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP45]], i32** [[TMP62]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP45]], i32** [[TMP64]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP65]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_8]], align 4
+// CHECK11-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[TMP69]], 0
 // CHECK11-NEXT:    [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
 // CHECK11-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[DIV11]], 1
 // CHECK11-NEXT:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP62]], 1
-// CHECK11-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP63]])
-// CHECK11-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50.region_id, i32 4, i8** [[TMP58]], i8** [[TMP59]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK11-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]]
-// CHECK11:       omp_offload.failed14:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50(i32 [[TMP34]], i32* [[TMP35]], i32* [[TMP36]], i32* [[TMP37]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT15]]
-// CHECK11:       omp_offload.cont15:
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[CH_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = load i32, i32* [[N_CASTED16]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP71:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP69]], i32* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP70]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP70]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP71]], i32** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP71]], i32** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP72]], i32** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP72]], i32** [[TMP96]], align 4
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS19]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP97]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS17]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP100]], i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4
-// CHECK11-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP101]], 0
-// CHECK11-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[DIV24]], 1
-// CHECK11-NEXT:    store i32 [[SUB25]], i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
-// CHECK11-NEXT:    [[ADD26:%.*]] = add nsw i32 [[TMP102]], 1
-// CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP103]])
-// CHECK11-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58.region_id, i32 5, i8** [[TMP98]], i8** [[TMP99]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
-// CHECK11-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
-// CHECK11:       omp_offload.failed27:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58(i32 [[TMP67]], i32 [[TMP69]], i32* [[TMP70]], i32* [[TMP71]], i32* [[TMP72]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
-// CHECK11:       omp_offload.cont28:
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP106]], i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP107:%.*]] = load i32, i32* [[N_CASTED29]], align 4
-// CHECK11-NEXT:    [[TMP108:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP109:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP110:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP112]], align 4
-// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP107]], i32* [[TMP114]], align 4
-// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK11-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP108]], i32** [[TMP117]], align 4
-// CHECK11-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP108]], i32** [[TMP119]], align 4
-// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP120]], align 4
-// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP109]], i32** [[TMP122]], align 4
-// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP109]], i32** [[TMP124]], align 4
-// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP110]], i32** [[TMP127]], align 4
-// CHECK11-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP110]], i32** [[TMP129]], align 4
-// CHECK11-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS32]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK11-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS30]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS31]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4
-// CHECK11-NEXT:    [[SUB36:%.*]] = sub nsw i32 [[TMP134]], 0
-// CHECK11-NEXT:    [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1
-// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[DIV37]], 1
-// CHECK11-NEXT:    store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_35]], align 4
-// CHECK11-NEXT:    [[ADD39:%.*]] = add nsw i32 [[TMP135]], 1
-// CHECK11-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD39]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP136]])
-// CHECK11-NEXT:    [[TMP137:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66.region_id, i32 4, i8** [[TMP131]], i8** [[TMP132]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP138:%.*]] = icmp ne i32 [[TMP137]], 0
-// CHECK11-NEXT:    br i1 [[TMP138]], label [[OMP_OFFLOAD_FAILED40:%.*]], label [[OMP_OFFLOAD_CONT41:%.*]]
-// CHECK11:       omp_offload.failed40:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66(i32 [[TMP107]], i32* [[TMP108]], i32* [[TMP109]], i32* [[TMP110]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT41]]
-// CHECK11:       omp_offload.cont41:
-// CHECK11-NEXT:    [[TMP139:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP139]], i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP140:%.*]] = load i32, i32* [[CH_CASTED42]], align 4
-// CHECK11-NEXT:    [[TMP141:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP141]], i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP142:%.*]] = load i32, i32* [[N_CASTED43]], align 4
-// CHECK11-NEXT:    [[TMP143:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP144:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP145:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP147]], align 4
-// CHECK11-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP140]], i32* [[TMP149]], align 4
-// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP150]], align 4
-// CHECK11-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP152]], align 4
-// CHECK11-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP142]], i32* [[TMP154]], align 4
-// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP155]], align 4
-// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP143]], i32** [[TMP157]], align 4
-// CHECK11-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP143]], i32** [[TMP159]], align 4
-// CHECK11-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP144]], i32** [[TMP162]], align 4
-// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP144]], i32** [[TMP164]], align 4
-// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP167:%.*]] = bitcast i8** [[TMP166]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP145]], i32** [[TMP167]], align 4
-// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP169:%.*]] = bitcast i8** [[TMP168]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP145]], i32** [[TMP169]], align 4
-// CHECK11-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS46]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP170]], align 4
-// CHECK11-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS44]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS45]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK11-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD13]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP71]])
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP66]], i8*** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP67]], i8*** [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.32, i32 0, i32 0), i64** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.33, i32 0, i32 0), i64** [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS14]])
+// CHECK11-NEXT:    [[TMP81:%.*]] = icmp ne i32 [[TMP80]], 0
+// CHECK11-NEXT:    br i1 [[TMP81]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11:       omp_offload.failed15:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50(i32 [[TMP42]], i32* [[TMP43]], i32* [[TMP44]], i32* [[TMP45]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
+// CHECK11:       omp_offload.cont16:
+// CHECK11-NEXT:    [[TMP82:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP82]], i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = load i32, i32* [[CH_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP84]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP83]], i32* [[TMP92]], align 4
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP95]], align 4
+// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP85]], i32* [[TMP97]], align 4
+// CHECK11-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP98]], align 4
+// CHECK11-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP86]], i32** [[TMP100]], align 4
+// CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP86]], i32** [[TMP102]], align 4
+// CHECK11-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP103]], align 4
+// CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP87]], i32** [[TMP105]], align 4
+// CHECK11-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP87]], i32** [[TMP107]], align 4
+// CHECK11-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK11-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP88]], i32** [[TMP110]], align 4
+// CHECK11-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP88]], i32** [[TMP112]], align 4
+// CHECK11-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK11-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP116:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP116]], i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[TMP117:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_22]], align 4
+// CHECK11-NEXT:    [[SUB24:%.*]] = sub nsw i32 [[TMP117]], 0
+// CHECK11-NEXT:    [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[DIV25]], 1
+// CHECK11-NEXT:    store i32 [[SUB26]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP118:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP118]], 1
+// CHECK11-NEXT:    [[TMP119:%.*]] = zext i32 [[ADD27]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP119]])
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP120]], align 4
+// CHECK11-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP121]], align 4
+// CHECK11-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP114]], i8*** [[TMP122]], align 4
+// CHECK11-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP115]], i8*** [[TMP123]], align 4
+// CHECK11-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.36, i32 0, i32 0), i64** [[TMP124]], align 4
+// CHECK11-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.37, i32 0, i32 0), i64** [[TMP125]], align 4
+// CHECK11-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP126]], align 4
+// CHECK11-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP127]], align 4
+// CHECK11-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS28]])
+// CHECK11-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK11-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11:       omp_offload.failed29:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l58(i32 [[TMP83]], i32 [[TMP85]], i32* [[TMP86]], i32* [[TMP87]], i32* [[TMP88]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
+// CHECK11:       omp_offload.cont30:
+// CHECK11-NEXT:    [[TMP130:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP130]], i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK11-NEXT:    [[TMP132:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP133:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP134:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP136]], align 4
+// CHECK11-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP131]], i32* [[TMP138]], align 4
+// CHECK11-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK11-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP132]], i32** [[TMP141]], align 4
+// CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP132]], i32** [[TMP143]], align 4
+// CHECK11-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP144]], align 4
+// CHECK11-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP133]], i32** [[TMP146]], align 4
+// CHECK11-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP133]], i32** [[TMP148]], align 4
+// CHECK11-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP149]], align 4
+// CHECK11-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP134]], i32** [[TMP151]], align 4
+// CHECK11-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP134]], i32** [[TMP153]], align 4
+// CHECK11-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK11-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP157:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP157]], i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_36]], align 4
+// CHECK11-NEXT:    [[SUB38:%.*]] = sub nsw i32 [[TMP158]], 0
+// CHECK11-NEXT:    [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1
+// CHECK11-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[DIV39]], 1
+// CHECK11-NEXT:    store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP159]], 1
+// CHECK11-NEXT:    [[TMP160:%.*]] = zext i32 [[ADD41]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP160]])
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP161]], align 4
+// CHECK11-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP162]], align 4
+// CHECK11-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP155]], i8*** [[TMP163]], align 4
+// CHECK11-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP156]], i8*** [[TMP164]], align 4
+// CHECK11-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.40, i32 0, i32 0), i64** [[TMP165]], align 4
+// CHECK11-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.41, i32 0, i32 0), i64** [[TMP166]], align 4
+// CHECK11-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP167]], align 4
+// CHECK11-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP168]], align 4
+// CHECK11-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS42]])
+// CHECK11-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
+// CHECK11-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK11:       omp_offload.failed43:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l66(i32 [[TMP131]], i32* [[TMP132]], i32* [[TMP133]], i32* [[TMP134]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
+// CHECK11:       omp_offload.cont44:
+// CHECK11-NEXT:    [[TMP171:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP171]], i32* [[CH_CASTED45]], align 4
+// CHECK11-NEXT:    [[TMP172:%.*]] = load i32, i32* [[CH_CASTED45]], align 4
 // CHECK11-NEXT:    [[TMP173:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_48]], align 4
-// CHECK11-NEXT:    [[SUB50:%.*]] = sub nsw i32 [[TMP174]], 0
-// CHECK11-NEXT:    [[DIV51:%.*]] = sdiv i32 [[SUB50]], 1
-// CHECK11-NEXT:    [[SUB52:%.*]] = sub nsw i32 [[DIV51]], 1
-// CHECK11-NEXT:    store i32 [[SUB52]], i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[TMP175:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_49]], align 4
-// CHECK11-NEXT:    [[ADD53:%.*]] = add nsw i32 [[TMP175]], 1
-// CHECK11-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD53]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP176]])
-// CHECK11-NEXT:    [[TMP177:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74.region_id, i32 5, i8** [[TMP171]], i8** [[TMP172]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP178:%.*]] = icmp ne i32 [[TMP177]], 0
-// CHECK11-NEXT:    br i1 [[TMP178]], label [[OMP_OFFLOAD_FAILED54:%.*]], label [[OMP_OFFLOAD_CONT55:%.*]]
-// CHECK11:       omp_offload.failed54:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74(i32 [[TMP140]], i32 [[TMP142]], i32* [[TMP143]], i32* [[TMP144]], i32* [[TMP145]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT55]]
-// CHECK11:       omp_offload.cont55:
-// CHECK11-NEXT:    [[TMP179:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP179]], i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP180:%.*]] = load i32, i32* [[N_CASTED56]], align 4
-// CHECK11-NEXT:    [[TMP181:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP182:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP183:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP185:%.*]] = bitcast i8** [[TMP184]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP185]], align 4
-// CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP187:%.*]] = bitcast i8** [[TMP186]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP180]], i32* [[TMP187]], align 4
-// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP188]], align 4
-// CHECK11-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP190:%.*]] = bitcast i8** [[TMP189]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP181]], i32** [[TMP190]], align 4
-// CHECK11-NEXT:    [[TMP191:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP192:%.*]] = bitcast i8** [[TMP191]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP181]], i32** [[TMP192]], align 4
-// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP193]], align 4
-// CHECK11-NEXT:    [[TMP194:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP195:%.*]] = bitcast i8** [[TMP194]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP182]], i32** [[TMP195]], align 4
-// CHECK11-NEXT:    [[TMP196:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP197:%.*]] = bitcast i8** [[TMP196]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP182]], i32** [[TMP197]], align 4
-// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP198]], align 4
-// CHECK11-NEXT:    [[TMP199:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP200:%.*]] = bitcast i8** [[TMP199]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP183]], i32** [[TMP200]], align 4
-// CHECK11-NEXT:    [[TMP201:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP202:%.*]] = bitcast i8** [[TMP201]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP183]], i32** [[TMP202]], align 4
-// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS59]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP203]], align 4
-// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP205:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP206]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK11-NEXT:    [[SUB63:%.*]] = sub nsw i32 [[TMP207]], 0
-// CHECK11-NEXT:    [[DIV64:%.*]] = sdiv i32 [[SUB63]], 1
-// CHECK11-NEXT:    [[SUB65:%.*]] = sub nsw i32 [[DIV64]], 1
-// CHECK11-NEXT:    store i32 [[SUB65]], i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[TMP208:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_62]], align 4
-// CHECK11-NEXT:    [[ADD66:%.*]] = add nsw i32 [[TMP208]], 1
-// CHECK11-NEXT:    [[TMP209:%.*]] = zext i32 [[ADD66]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP209]])
-// CHECK11-NEXT:    [[TMP210:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82.region_id, i32 4, i8** [[TMP204]], i8** [[TMP205]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP211:%.*]] = icmp ne i32 [[TMP210]], 0
-// CHECK11-NEXT:    br i1 [[TMP211]], label [[OMP_OFFLOAD_FAILED67:%.*]], label [[OMP_OFFLOAD_CONT68:%.*]]
-// CHECK11:       omp_offload.failed67:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82(i32 [[TMP180]], i32* [[TMP181]], i32* [[TMP182]], i32* [[TMP183]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT68]]
-// CHECK11:       omp_offload.cont68:
-// CHECK11-NEXT:    [[TMP212:%.*]] = load i32, i32* [[CH]], align 4
-// CHECK11-NEXT:    store i32 [[TMP212]], i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP213:%.*]] = load i32, i32* [[CH_CASTED69]], align 4
-// CHECK11-NEXT:    [[TMP214:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP214]], i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP215:%.*]] = load i32, i32* [[N_CASTED70]], align 4
-// CHECK11-NEXT:    [[TMP216:%.*]] = load i32*, i32** [[A]], align 4
-// CHECK11-NEXT:    [[TMP217:%.*]] = load i32*, i32** [[B]], align 4
-// CHECK11-NEXT:    [[TMP218:%.*]] = load i32*, i32** [[C]], align 4
-// CHECK11-NEXT:    [[TMP219:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP220:%.*]] = bitcast i8** [[TMP219]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP220]], align 4
-// CHECK11-NEXT:    [[TMP221:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP222:%.*]] = bitcast i8** [[TMP221]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP213]], i32* [[TMP222]], align 4
-// CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP223]], align 4
-// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP173]], i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP174:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK11-NEXT:    [[TMP175:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP176:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP177:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP179:%.*]] = bitcast i8** [[TMP178]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP179]], align 4
+// CHECK11-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP181:%.*]] = bitcast i8** [[TMP180]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP172]], i32* [[TMP181]], align 4
+// CHECK11-NEXT:    [[TMP182:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP182]], align 4
+// CHECK11-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP184:%.*]] = bitcast i8** [[TMP183]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP184]], align 4
+// CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP186:%.*]] = bitcast i8** [[TMP185]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP174]], i32* [[TMP186]], align 4
+// CHECK11-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP187]], align 4
+// CHECK11-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP189:%.*]] = bitcast i8** [[TMP188]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP175]], i32** [[TMP189]], align 4
+// CHECK11-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP191:%.*]] = bitcast i8** [[TMP190]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP175]], i32** [[TMP191]], align 4
+// CHECK11-NEXT:    [[TMP192:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP192]], align 4
+// CHECK11-NEXT:    [[TMP193:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP194:%.*]] = bitcast i8** [[TMP193]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP176]], i32** [[TMP194]], align 4
+// CHECK11-NEXT:    [[TMP195:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP196:%.*]] = bitcast i8** [[TMP195]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP176]], i32** [[TMP196]], align 4
+// CHECK11-NEXT:    [[TMP197:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP197]], align 4
+// CHECK11-NEXT:    [[TMP198:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP199:%.*]] = bitcast i8** [[TMP198]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP177]], i32** [[TMP199]], align 4
+// CHECK11-NEXT:    [[TMP200:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP201:%.*]] = bitcast i8** [[TMP200]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP177]], i32** [[TMP201]], align 4
+// CHECK11-NEXT:    [[TMP202:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP202]], align 4
+// CHECK11-NEXT:    [[TMP203:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP204:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP205:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP205]], i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[TMP206:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_51]], align 4
+// CHECK11-NEXT:    [[SUB53:%.*]] = sub nsw i32 [[TMP206]], 0
+// CHECK11-NEXT:    [[DIV54:%.*]] = sdiv i32 [[SUB53]], 1
+// CHECK11-NEXT:    [[SUB55:%.*]] = sub nsw i32 [[DIV54]], 1
+// CHECK11-NEXT:    store i32 [[SUB55]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[TMP207:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP207]], 1
+// CHECK11-NEXT:    [[TMP208:%.*]] = zext i32 [[ADD56]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP208]])
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP209:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP209]], align 4
+// CHECK11-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP210]], align 4
+// CHECK11-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP203]], i8*** [[TMP211]], align 4
+// CHECK11-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP204]], i8*** [[TMP212]], align 4
+// CHECK11-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.44, i32 0, i32 0), i64** [[TMP213]], align 4
+// CHECK11-NEXT:    [[TMP214:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.45, i32 0, i32 0), i64** [[TMP214]], align 4
+// CHECK11-NEXT:    [[TMP215:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP215]], align 4
+// CHECK11-NEXT:    [[TMP216:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP216]], align 4
+// CHECK11-NEXT:    [[TMP217:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS57]])
+// CHECK11-NEXT:    [[TMP218:%.*]] = icmp ne i32 [[TMP217]], 0
+// CHECK11-NEXT:    br i1 [[TMP218]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK11:       omp_offload.failed58:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l74(i32 [[TMP172]], i32 [[TMP174]], i32* [[TMP175]], i32* [[TMP176]], i32* [[TMP177]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
+// CHECK11:       omp_offload.cont59:
+// CHECK11-NEXT:    [[TMP219:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP219]], i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP220:%.*]] = load i32, i32* [[N_CASTED60]], align 4
+// CHECK11-NEXT:    [[TMP221:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP222:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP223:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP225:%.*]] = bitcast i8** [[TMP224]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP225]], align 4
-// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP225]], align 4
+// CHECK11-NEXT:    [[TMP226:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP227:%.*]] = bitcast i8** [[TMP226]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP215]], i32* [[TMP227]], align 4
-// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 [[TMP220]], i32* [[TMP227]], align 4
+// CHECK11-NEXT:    [[TMP228:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP228]], align 4
-// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP229:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP230:%.*]] = bitcast i8** [[TMP229]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP216]], i32** [[TMP230]], align 4
-// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 2
+// CHECK11-NEXT:    store i32* [[TMP221]], i32** [[TMP230]], align 4
+// CHECK11-NEXT:    [[TMP231:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP232:%.*]] = bitcast i8** [[TMP231]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP216]], i32** [[TMP232]], align 4
-// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 2
+// CHECK11-NEXT:    store i32* [[TMP221]], i32** [[TMP232]], align 4
+// CHECK11-NEXT:    [[TMP233:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 1
 // CHECK11-NEXT:    store i8* null, i8** [[TMP233]], align 4
-// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP234:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP235:%.*]] = bitcast i8** [[TMP234]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP217]], i32** [[TMP235]], align 4
-// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 3
+// CHECK11-NEXT:    store i32* [[TMP222]], i32** [[TMP235]], align 4
+// CHECK11-NEXT:    [[TMP236:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP237:%.*]] = bitcast i8** [[TMP236]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP217]], i32** [[TMP237]], align 4
-// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 3
+// CHECK11-NEXT:    store i32* [[TMP222]], i32** [[TMP237]], align 4
+// CHECK11-NEXT:    [[TMP238:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 2
 // CHECK11-NEXT:    store i8* null, i8** [[TMP238]], align 4
-// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP239:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP240:%.*]] = bitcast i8** [[TMP239]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP218]], i32** [[TMP240]], align 4
-// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 4
+// CHECK11-NEXT:    store i32* [[TMP223]], i32** [[TMP240]], align 4
+// CHECK11-NEXT:    [[TMP241:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP242:%.*]] = bitcast i8** [[TMP241]] to i32**
-// CHECK11-NEXT:    store i32* [[TMP218]], i32** [[TMP242]], align 4
-// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS73]], i32 0, i32 4
+// CHECK11-NEXT:    store i32* [[TMP223]], i32** [[TMP242]], align 4
+// CHECK11-NEXT:    [[TMP243:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS63]], i32 0, i32 3
 // CHECK11-NEXT:    store i8* null, i8** [[TMP243]], align 4
-// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS71]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS72]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP244:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS61]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP245:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS62]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP246:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_75]], align 4
-// CHECK11-NEXT:    [[SUB77:%.*]] = sub nsw i32 [[TMP247]], 0
-// CHECK11-NEXT:    [[DIV78:%.*]] = sdiv i32 [[SUB77]], 1
-// CHECK11-NEXT:    [[SUB79:%.*]] = sub nsw i32 [[DIV78]], 1
-// CHECK11-NEXT:    store i32 [[SUB79]], i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_76]], align 4
-// CHECK11-NEXT:    [[ADD80:%.*]] = add nsw i32 [[TMP248]], 1
-// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD80]] to i64
+// CHECK11-NEXT:    store i32 [[TMP246]], i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[TMP247:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_65]], align 4
+// CHECK11-NEXT:    [[SUB67:%.*]] = sub nsw i32 [[TMP247]], 0
+// CHECK11-NEXT:    [[DIV68:%.*]] = sdiv i32 [[SUB67]], 1
+// CHECK11-NEXT:    [[SUB69:%.*]] = sub nsw i32 [[DIV68]], 1
+// CHECK11-NEXT:    store i32 [[SUB69]], i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[TMP248:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_66]], align 4
+// CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP248]], 1
+// CHECK11-NEXT:    [[TMP249:%.*]] = zext i32 [[ADD70]] to i64
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP249]])
-// CHECK11-NEXT:    [[TMP250:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90.region_id, i32 5, i8** [[TMP244]], i8** [[TMP245]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP251:%.*]] = icmp ne i32 [[TMP250]], 0
-// CHECK11-NEXT:    br i1 [[TMP251]], label [[OMP_OFFLOAD_FAILED81:%.*]], label [[OMP_OFFLOAD_CONT82:%.*]]
-// CHECK11:       omp_offload.failed81:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90(i32 [[TMP213]], i32 [[TMP215]], i32* [[TMP216]], i32* [[TMP217]], i32* [[TMP218]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT82]]
-// CHECK11:       omp_offload.cont82:
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP250:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP250]], align 4
+// CHECK11-NEXT:    [[TMP251:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 4, i32* [[TMP251]], align 4
+// CHECK11-NEXT:    [[TMP252:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP244]], i8*** [[TMP252]], align 4
+// CHECK11-NEXT:    [[TMP253:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP245]], i8*** [[TMP253]], align 4
+// CHECK11-NEXT:    [[TMP254:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.48, i32 0, i32 0), i64** [[TMP254]], align 4
+// CHECK11-NEXT:    [[TMP255:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.49, i32 0, i32 0), i64** [[TMP255]], align 4
+// CHECK11-NEXT:    [[TMP256:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP256]], align 4
+// CHECK11-NEXT:    [[TMP257:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP257]], align 4
+// CHECK11-NEXT:    [[TMP258:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS71]])
+// CHECK11-NEXT:    [[TMP259:%.*]] = icmp ne i32 [[TMP258]], 0
+// CHECK11-NEXT:    br i1 [[TMP259]], label [[OMP_OFFLOAD_FAILED72:%.*]], label [[OMP_OFFLOAD_CONT73:%.*]]
+// CHECK11:       omp_offload.failed72:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l82(i32 [[TMP220]], i32* [[TMP221]], i32* [[TMP222]], i32* [[TMP223]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT73]]
+// CHECK11:       omp_offload.cont73:
+// CHECK11-NEXT:    [[TMP260:%.*]] = load i32, i32* [[CH]], align 4
+// CHECK11-NEXT:    store i32 [[TMP260]], i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP261:%.*]] = load i32, i32* [[CH_CASTED74]], align 4
+// CHECK11-NEXT:    [[TMP262:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP262]], i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP263:%.*]] = load i32, i32* [[N_CASTED75]], align 4
+// CHECK11-NEXT:    [[TMP264:%.*]] = load i32*, i32** [[A]], align 4
+// CHECK11-NEXT:    [[TMP265:%.*]] = load i32*, i32** [[B]], align 4
+// CHECK11-NEXT:    [[TMP266:%.*]] = load i32*, i32** [[C]], align 4
+// CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP268:%.*]] = bitcast i8** [[TMP267]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP268]], align 4
+// CHECK11-NEXT:    [[TMP269:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP270:%.*]] = bitcast i8** [[TMP269]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP261]], i32* [[TMP270]], align 4
+// CHECK11-NEXT:    [[TMP271:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP271]], align 4
+// CHECK11-NEXT:    [[TMP272:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP273:%.*]] = bitcast i8** [[TMP272]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP273]], align 4
+// CHECK11-NEXT:    [[TMP274:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP275:%.*]] = bitcast i8** [[TMP274]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP263]], i32* [[TMP275]], align 4
+// CHECK11-NEXT:    [[TMP276:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP276]], align 4
+// CHECK11-NEXT:    [[TMP277:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP278:%.*]] = bitcast i8** [[TMP277]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP264]], i32** [[TMP278]], align 4
+// CHECK11-NEXT:    [[TMP279:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP280:%.*]] = bitcast i8** [[TMP279]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP264]], i32** [[TMP280]], align 4
+// CHECK11-NEXT:    [[TMP281:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP281]], align 4
+// CHECK11-NEXT:    [[TMP282:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP283:%.*]] = bitcast i8** [[TMP282]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP265]], i32** [[TMP283]], align 4
+// CHECK11-NEXT:    [[TMP284:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP285:%.*]] = bitcast i8** [[TMP284]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP265]], i32** [[TMP285]], align 4
+// CHECK11-NEXT:    [[TMP286:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP286]], align 4
+// CHECK11-NEXT:    [[TMP287:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP288:%.*]] = bitcast i8** [[TMP287]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP266]], i32** [[TMP288]], align 4
+// CHECK11-NEXT:    [[TMP289:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP290:%.*]] = bitcast i8** [[TMP289]] to i32**
+// CHECK11-NEXT:    store i32* [[TMP266]], i32** [[TMP290]], align 4
+// CHECK11-NEXT:    [[TMP291:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS78]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP291]], align 4
+// CHECK11-NEXT:    [[TMP292:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS76]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP293:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS77]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP294:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP294]], i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[TMP295:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_80]], align 4
+// CHECK11-NEXT:    [[SUB82:%.*]] = sub nsw i32 [[TMP295]], 0
+// CHECK11-NEXT:    [[DIV83:%.*]] = sdiv i32 [[SUB82]], 1
+// CHECK11-NEXT:    [[SUB84:%.*]] = sub nsw i32 [[DIV83]], 1
+// CHECK11-NEXT:    store i32 [[SUB84]], i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[TMP296:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_81]], align 4
+// CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP296]], 1
+// CHECK11-NEXT:    [[TMP297:%.*]] = zext i32 [[ADD85]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP297]])
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[TMP298:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 0
+// CHECK11-NEXT:    store i32 1, i32* [[TMP298]], align 4
+// CHECK11-NEXT:    [[TMP299:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 1
+// CHECK11-NEXT:    store i32 5, i32* [[TMP299]], align 4
+// CHECK11-NEXT:    [[TMP300:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 2
+// CHECK11-NEXT:    store i8** [[TMP292]], i8*** [[TMP300]], align 4
+// CHECK11-NEXT:    [[TMP301:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 3
+// CHECK11-NEXT:    store i8** [[TMP293]], i8*** [[TMP301]], align 4
+// CHECK11-NEXT:    [[TMP302:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 4
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.52, i32 0, i32 0), i64** [[TMP302]], align 4
+// CHECK11-NEXT:    [[TMP303:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 5
+// CHECK11-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.53, i32 0, i32 0), i64** [[TMP303]], align 4
+// CHECK11-NEXT:    [[TMP304:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 6
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP304]], align 4
+// CHECK11-NEXT:    [[TMP305:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]], i32 0, i32 7
+// CHECK11-NEXT:    store i8** null, i8*** [[TMP305]], align 4
+// CHECK11-NEXT:    [[TMP306:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS86]])
+// CHECK11-NEXT:    [[TMP307:%.*]] = icmp ne i32 [[TMP306]], 0
+// CHECK11-NEXT:    br i1 [[TMP307]], label [[OMP_OFFLOAD_FAILED87:%.*]], label [[OMP_OFFLOAD_CONT88:%.*]]
+// CHECK11:       omp_offload.failed87:
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l90(i32 [[TMP261]], i32 [[TMP263]], i32* [[TMP264]], i32* [[TMP265]], i32* [[TMP266]]) #[[ATTR2]]
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT88]]
+// CHECK11:       omp_offload.cont88:
 // CHECK11-NEXT:    ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index f58863ba18d18..e211678c5bfaf 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -931,9 +931,26 @@ int main() {
 // CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK8-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK8-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK8-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK8-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK8-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK8-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 8
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 8
+// CHECK8-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 8
+// CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 8
+// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP42]], align 8
+// CHECK8-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP43]], align 8
+// CHECK8-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK8-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK8-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i64 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -941,18 +958,18 @@ int main() {
 // CHECK8-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
 // CHECK8-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK8-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
+// CHECK8-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2
 // CHECK8-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK8:       arraydestroy.body:
-// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK8-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK8-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK8:       arraydestroy.done3:
 // CHECK8-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK8-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK8-NEXT:    ret i32 [[TMP39]]
+// CHECK8-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK8-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -1377,27 +1394,44 @@ int main() {
 // CHECK8-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK8-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK8-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK8-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK8-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK8-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 8
+// CHECK8-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 8
+// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 8
+// CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK8-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 8
+// CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP35]], align 8
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK8-NEXT:    store i8** null, i8*** [[TMP36]], align 8
+// CHECK8-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK8-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK8-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK8:       omp_offload.cont:
 // CHECK8-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK8-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2
 // CHECK8-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK8:       arraydestroy.body:
-// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK8-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
 // CHECK8-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK8-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK8-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK8:       arraydestroy.done2:
 // CHECK8-NEXT:    call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK8-NEXT:    [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK8-NEXT:    ret i32 [[TMP32]]
+// CHECK8-NEXT:    [[TMP40:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK8-NEXT:    ret i32 [[TMP40]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev
@@ -1895,9 +1929,26 @@ int main() {
 // CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2)
-// CHECK10-NEXT:    [[TMP36:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0
-// CHECK10-NEXT:    br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK10-NEXT:    store i32 1, i32* [[TMP36]], align 4
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 5, i32* [[TMP37]], align 4
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK10-NEXT:    store i8** [[TMP34]], i8*** [[TMP38]], align 4
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK10-NEXT:    store i8** [[TMP35]], i8*** [[TMP39]], align 4
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP40]], align 4
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP41]], align 4
+// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP42]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP43]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK10-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK10-NEXT:    br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l138(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP4]], i32 [[TMP6]]) #[[ATTR4:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1905,18 +1956,18 @@ int main() {
 // CHECK10-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v()
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
 // CHECK10-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2
 // CHECK10-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK10:       arraydestroy.body:
-// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1
 // CHECK10-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]]
 // CHECK10-NEXT:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
 // CHECK10-NEXT:    br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]]
 // CHECK10:       arraydestroy.done2:
 // CHECK10-NEXT:    call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]]
-// CHECK10-NEXT:    [[TMP39:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP39]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP47]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev
@@ -2328,27 +2379,44 @@ int main() {
 // CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2)
-// CHECK10-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
-// CHECK10-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK10-NEXT:    store i32 1, i32* [[TMP29]], align 4
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 4, i32* [[TMP30]], align 4
+// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK10-NEXT:    store i8** [[TMP27]], i8*** [[TMP31]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK10-NEXT:    store i8** [[TMP28]], i8*** [[TMP32]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.4, i32 0, i32 0), i64** [[TMP33]], align 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK10-NEXT:    store i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.5, i32 0, i32 0), i64** [[TMP34]], align 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP35]], align 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK10-NEXT:    store i8** null, i8*** [[TMP36]], align 4
+// CHECK10-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 -1, i32 0, i32 1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]])
+// CHECK10-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK10-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK10-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2
 // CHECK10-NEXT:    br label [[ARRAYDESTROY_BODY:%.*]]
 // CHECK10:       arraydestroy.body:
-// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
+// CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ]
 // CHECK10-NEXT:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds